Transfer Learning with SVD operator

me1234
me1234 New Altair Community Member
edited November 5 in Community Q&A

Hi, Rapidminer community,

 

For a university project, I'm working on Transfer Learning. So, I build a model that learns from a source dataset (amazon text reviews with a star rating) and uses this knowledge to predict the stars on the target dataset but also combine information from the source and target to optimize the predictions and reduce the dimensionality with the SVD operator. 

 

In the code that I added, there are 3 SVD's, one for the source, one for the target training and one for target testing. I'm not sure how to connect the pre(processing models) connections. In the code example I added a few apply model's. But this gives the error "Wrong number of attributes, operator expects a fixed number of attributes (100), the actual number was 100".

 

I also tried to add an extra 'group models' and combine the SVD preprocess models but didn't work either.

 

Who can help me?

 

 

<?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
<operator activated="true" class="read_csv" compatibility="8.1.000" expanded="true" height="68" name="Read Health&amp;PC" width="90" x="514" y="136">
<parameter key="csv_file" value="F:\CloudStation\Studie Mikael\VAF\DATA\amahpc-right-10k.csv"/>
<parameter key="column_separators" value=";"/>
<parameter key="trim_lines" value="false"/>
<parameter key="use_quotes" value="true"/>
<parameter key="quotes_character" value="&quot;"/>
<parameter key="escape_character" value="\"/>
<parameter key="skip_comments" value="false"/>
<parameter key="comment_characters" value="#"/>
<parameter key="parse_numbers" value="true"/>
<parameter key="decimal_character" value="."/>
<parameter key="grouped_digits" value="false"/>
<parameter key="grouping_character" value=","/>
<parameter key="date_format" value=""/>
<parameter key="first_row_as_names" value="false"/>
<list key="annotations">
<parameter key="0" value="Name"/>
</list>
<parameter key="time_zone" value="SYSTEM"/>
<parameter key="locale" value="English (United States)"/>
<parameter key="encoding" value="windows-1252"/>
<parameter key="read_all_values_as_polynominal" value="false"/>
<list key="data_set_meta_data_information">
<parameter key="0" value="att1.true.real.attribute"/>
<parameter key="1" value="asin.true.polynominal.attribute"/>
<parameter key="2" value="reviewerName.true.polynominal.attribute"/>
<parameter key="3" value="helpful.true.real.attribute"/>
<parameter key="4" value="reviewText.true.polynominal.attribute"/>
<parameter key="5" value="overall.true.polynominal.attribute"/>
<parameter key="6" value="summary.true.polynominal.attribute"/>
<parameter key="7" value="unixReviewTime.true.real.attribute"/>
<parameter key="8" value="reviewTime.true.polynominal.attribute"/>
<parameter key="9" value="X1.true.real.attribute"/>
<parameter key="10" value="X2.true.real.attribute"/>
</list>
<parameter key="read_not_matching_values_as_missings" value="true"/>
<parameter key="datamanagement" value="double_array"/>
<parameter key="data_management" value="auto"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
<operator activated="true" class="subprocess" compatibility="8.1.003" expanded="true" height="82" name="Pre-processing" width="90" x="648" y="136">
<process expanded="true">
<operator activated="true" class="replace" compatibility="8.1.003" expanded="true" height="82" name="Replace 1.0 &gt; negative" width="90" x="179" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="overall"/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="nominal"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="file_path"/>
<parameter key="block_type" value="single_value"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="single_value"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
<parameter key="replace_what" value="1.0"/>
<parameter key="replace_by" value="negative"/>
</operator>
<operator activated="true" class="replace" compatibility="8.1.003" expanded="true" height="82" name="Replace 2.0 &gt; negative" width="90" x="313" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="overall"/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="nominal"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="file_path"/>
<parameter key="block_type" value="single_value"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="single_value"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
<parameter key="replace_what" value="2.0"/>
<parameter key="replace_by" value="negative"/>
</operator>
<operator activated="true" class="replace" compatibility="8.1.003" expanded="true" height="82" name="Replace 3.0 &gt; neutral" width="90" x="447" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="overall"/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="nominal"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="file_path"/>
<parameter key="block_type" value="single_value"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="single_value"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
<parameter key="replace_what" value="3.0"/>
<parameter key="replace_by" value="neutral"/>
</operator>
<operator activated="true" class="replace" compatibility="8.1.003" expanded="true" height="82" name="Replace 4.0 &gt; positive" width="90" x="581" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="overall"/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="nominal"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="file_path"/>
<parameter key="block_type" value="single_value"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="single_value"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
<parameter key="replace_what" value="4.0"/>
<parameter key="replace_by" value="positive"/>
</operator>
<operator activated="true" class="replace" compatibility="8.1.003" expanded="true" height="82" name="Replace 5.0 &gt; positive" width="90" x="715" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="overall"/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="nominal"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="file_path"/>
<parameter key="block_type" value="single_value"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="single_value"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
<parameter key="replace_what" value="5.0"/>
<parameter key="replace_by" value="positive"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="8.1.003" expanded="true" height="82" name="Select Attributes" width="90" x="380" y="136">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attribute" value=""/>
<parameter key="attributes" value="reviewText|overall"/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="attribute_value"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="time"/>
<parameter key="block_type" value="attribute_block"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_matrix_row_start"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
</operator>
<operator activated="true" class="declare_missing_value" compatibility="8.1.003" expanded="true" height="82" name="Declare Missing Value" width="90" x="514" y="136">
<parameter key="attribute_filter_type" value="all"/>
<parameter key="attribute" value=""/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="attribute_value"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="time"/>
<parameter key="block_type" value="attribute_block"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_matrix_row_start"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
<parameter key="mode" value="nominal"/>
<parameter key="nominal_value" value="?"/>
<parameter key="expression_value" value=""/>
</operator>
<operator activated="true" class="filter_examples" compatibility="8.1.003" expanded="true" height="103" name="Filter Examples" width="90" x="648" y="136">
<parameter key="parameter_expression" value=""/>
<parameter key="condition_class" value="custom_filters"/>
<parameter key="invert_filter" value="true"/>
<list key="filters_list">
<parameter key="filters_entry_key" value="overall.is_missing."/>
<parameter key="filters_entry_key" value="reviewText.is_missing."/>
</list>
<parameter key="filters_logic_and" value="true"/>
<parameter key="filters_check_metadata" value="true"/>
</operator>
<operator activated="true" class="set_role" compatibility="8.1.003" expanded="true" height="82" name="Set Role" width="90" x="782" y="136">
<parameter key="attribute_name" value="overall"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="nominal_to_text" compatibility="8.1.003" expanded="true" height="82" name="Nominal to Text" width="90" x="916" y="136">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="reviewText"/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="nominal"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="file_path"/>
<parameter key="block_type" value="single_value"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="single_value"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
</operator>
<connect from_port="in 1" to_op="Replace 1.0 &gt; negative" to_port="example set input"/>
<connect from_op="Replace 1.0 &gt; negative" from_port="example set output" to_op="Replace 2.0 &gt; negative" to_port="example set input"/>
<connect from_op="Replace 2.0 &gt; negative" from_port="example set output" to_op="Replace 3.0 &gt; neutral" to_port="example set input"/>
<connect from_op="Replace 3.0 &gt; neutral" from_port="example set output" to_op="Replace 4.0 &gt; positive" to_port="example set input"/>
<connect from_op="Replace 4.0 &gt; positive" from_port="example set output" to_op="Replace 5.0 &gt; positive" to_port="example set input"/>
<connect from_op="Replace 5.0 &gt; positive" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Declare Missing Value" to_port="example set input"/>
<connect from_op="Declare Missing Value" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
<connect from_op="Filter Examples" from_port="example set output" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Nominal to Text" to_port="example set input"/>
<connect from_op="Nominal to Text" from_port="example set output" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
<operator activated="true" class="operator_toolbox:smote" compatibility="1.0.000" expanded="true" height="82" name="Smote Upsampling" width="90" x="782" y="85">
<parameter key="number_of_neighbours" value="5"/>
<parameter key="normalize" value="true"/>
<parameter key="equalize_classes" value="true"/>
<parameter key="upsampling_size" value="1000"/>
<parameter key="auto_detect_minority_class" value="true"/>
<parameter key="round_integers" value="true"/>
<parameter key="nominal_change_rate" value="0.5"/>
<parameter key="use_local_random_seed" value="false"/>
<parameter key="local_random_seed" value="1992"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
<operator activated="true" class="sample" compatibility="8.1.003" expanded="true" height="82" name="Sample" width="90" x="782" y="187">
<parameter key="sample" value="relative"/>
<parameter key="balance_data" value="true"/>
<parameter key="sample_size" value="100"/>
<parameter key="sample_ratio" value="0.1"/>
<parameter key="sample_probability" value="0.1"/>
<list key="sample_size_per_class">
<parameter key="positive" value="1000"/>
<parameter key="negative" value="1000"/>
<parameter key="neutral" value="1000"/>
</list>
<list key="sample_ratio_per_class">
<parameter key="negative" value="1.0"/>
<parameter key="positive" value="0.3"/>
<parameter key="neutral" value="1.0"/>
</list>
<list key="sample_probability_per_class"/>
<parameter key="use_local_random_seed" value="false"/>
<parameter key="local_random_seed" value="1992"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
<operator activated="true" class="text:process_document_from_data" compatibility="8.1.000" expanded="true" height="82" name="Process Documents from Data" width="90" x="916" y="187">
<parameter key="create_word_vector" value="true"/>
<parameter key="vector_creation" value="TF-IDF"/>
<parameter key="add_meta_information" value="true"/>
<parameter key="keep_text" value="false"/>
<parameter key="prune_method" value="percentual"/>
<parameter key="prune_below_percent" value="1.0"/>
<parameter key="prune_above_percent" value="30.0"/>
<parameter key="prune_below_rank" value="0.05"/>
<parameter key="prune_above_rank" value="0.95"/>
<parameter key="datamanagement" value="double_sparse_array"/>
<parameter key="data_management" value="auto"/>
<parameter key="select_attributes_and_weights" value="false"/>
<list key="specify_weights"/>
<process expanded="true">
<operator activated="true" class="text:tokenize" compatibility="8.1.000" expanded="true" height="68" name="Tokenize" width="90" x="179" y="34">
<parameter key="mode" value="non letters"/>
<parameter key="characters" value=".:"/>
<parameter key="language" value="English"/>
<parameter key="max_token_length" value="3"/>
</operator>
<operator activated="true" class="text:stem_porter" compatibility="8.1.000" expanded="true" height="68" name="Stem (Porter)" width="90" x="313" y="34"/>
<operator activated="true" class="text:filter_stopwords_english" compatibility="8.1.000" expanded="true" height="68" name="Filter Stopwords (English)" width="90" x="447" y="34"/>
<operator activated="true" class="text:transform_cases" compatibility="8.1.000" expanded="true" height="68" name="Transform Cases" width="90" x="581" y="34">
<parameter key="transform_to" value="lower case"/>
</operator>
<operator activated="true" class="text:filter_by_length" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (by Length)" width="90" x="782" y="34">
<parameter key="min_chars" value="2"/>
<parameter key="max_chars" value="15"/>
</operator>
<connect from_port="document" to_op="Tokenize" to_port="document"/>
<connect from_op="Tokenize" from_port="document" to_op="Stem (Porter)" to_port="document"/>
<connect from_op="Stem (Porter)" from_port="document" to_op="Filter Stopwords (English)" to_port="document"/>
<connect from_op="Filter Stopwords (English)" from_port="document" to_op="Transform Cases" to_port="document"/>
<connect from_op="Transform Cases" from_port="document" to_op="Filter Tokens (by Length)" to_port="document"/>
<connect from_op="Filter Tokens (by Length)" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
<operator activated="true" class="singular_value_decomposition" compatibility="8.1.003" expanded="true" height="103" name="SVD" width="90" x="1050" y="136">
<parameter key="dimensionality_reduction" value="fixed number"/>
<parameter key="percentage_threshold" value="0.95"/>
<parameter key="dimensions" value="100"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
<operator activated="true" class="read_csv" compatibility="8.1.003" expanded="true" height="68" name="Read Beauty" width="90" x="514" y="289">
<parameter key="csv_file" value="F:\CloudStation\Studie Mikael\VAF\DATA\amabea-right-10k.csv"/>
<parameter key="column_separators" value=";"/>
<parameter key="trim_lines" value="false"/>
<parameter key="use_quotes" value="true"/>
<parameter key="quotes_character" value="&quot;"/>
<parameter key="escape_character" value="\"/>
<parameter key="skip_comments" value="false"/>
<parameter key="comment_characters" value="#"/>
<parameter key="parse_numbers" value="true"/>
<parameter key="decimal_character" value="."/>
<parameter key="grouped_digits" value="false"/>
<parameter key="grouping_character" value=","/>
<parameter key="date_format" value=""/>
<parameter key="first_row_as_names" value="false"/>
<list key="annotations">
<parameter key="0" value="Name"/>
</list>
<parameter key="time_zone" value="SYSTEM"/>
<parameter key="locale" value="English (United States)"/>
<parameter key="encoding" value="windows-1252"/>
<parameter key="read_all_values_as_polynominal" value="false"/>
<list key="data_set_meta_data_information">
<parameter key="0" value="att1.true.real.attribute"/>
<parameter key="1" value="reviewerID.true.polynominal.attribute"/>
<parameter key="2" value="summary.true.polynominal.attribute"/>
<parameter key="3" value="reviewerName.true.polynominal.attribute"/>
<parameter key="4" value="unixReviewTime.true.real.attribute"/>
<parameter key="5" value="reviewText.true.polynominal.attribute"/>
<parameter key="6" value="overall.true.polynominal.attribute"/>
<parameter key="7" value="helpful.true.real.attribute"/>
<parameter key="8" value="asin.true.polynominal.attribute"/>
<parameter key="9" value="X1.true.real.attribute"/>
<parameter key="10" value="X2.true.real.attribute"/>
</list>
<parameter key="read_not_matching_values_as_missings" value="true"/>
<parameter key="datamanagement" value="double_array"/>
<parameter key="data_management" value="auto"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
<operator activated="true" class="subprocess" compatibility="8.1.003" expanded="true" height="82" name="Pre-processing (2)" width="90" x="648" y="289">
<process expanded="true">
<operator activated="true" class="replace" compatibility="8.1.003" expanded="true" height="82" name="Replace 1.0 &gt; negative (2)" width="90" x="45" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="overall"/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="nominal"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="file_path"/>
<parameter key="block_type" value="single_value"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="single_value"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
<parameter key="replace_what" value="1.0"/>
<parameter key="replace_by" value="negative"/>
</operator>
<operator activated="true" class="replace" compatibility="8.1.003" expanded="true" height="82" name="Replace 2.0 &gt; negative (2)" width="90" x="179" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="overall"/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="nominal"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="file_path"/>
<parameter key="block_type" value="single_value"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="single_value"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
<parameter key="replace_what" value="2.0"/>
<parameter key="replace_by" value="negative"/>
</operator>
<operator activated="true" class="replace" compatibility="8.1.003" expanded="true" height="82" name="Replace 3.0 &gt; neutral (2)" width="90" x="313" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="overall"/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="nominal"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="file_path"/>
<parameter key="block_type" value="single_value"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="single_value"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
<parameter key="replace_what" value="3.0"/>
<parameter key="replace_by" value="neutral"/>
</operator>
<operator activated="true" class="replace" compatibility="8.1.003" expanded="true" height="82" name="Replace 4.0 &gt; positive (2)" width="90" x="447" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="overall"/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="nominal"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="file_path"/>
<parameter key="block_type" value="single_value"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="single_value"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
<parameter key="replace_what" value="4.0"/>
<parameter key="replace_by" value="positive"/>
</operator>
<operator activated="true" class="replace" compatibility="8.1.003" expanded="true" height="82" name="Replace 5.0 &gt; positive (2)" width="90" x="581" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="overall"/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="nominal"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="file_path"/>
<parameter key="block_type" value="single_value"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="single_value"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
<parameter key="replace_what" value="5.0"/>
<parameter key="replace_by" value="positive"/>
</operator>
<operator activated="true" class="declare_missing_value" compatibility="8.1.003" expanded="true" height="82" name="Declare Missing Value (2)" width="90" x="246" y="187">
<parameter key="attribute_filter_type" value="all"/>
<parameter key="attribute" value=""/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="attribute_value"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="time"/>
<parameter key="block_type" value="attribute_block"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_matrix_row_start"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
<parameter key="mode" value="nominal"/>
<parameter key="nominal_value" value="?"/>
<parameter key="expression_value" value=""/>
</operator>
<operator activated="true" class="filter_examples" compatibility="8.1.003" expanded="true" height="103" name="Filter Examples (2)" width="90" x="380" y="187">
<parameter key="parameter_expression" value=""/>
<parameter key="condition_class" value="custom_filters"/>
<parameter key="invert_filter" value="true"/>
<list key="filters_list">
<parameter key="filters_entry_key" value="overall.is_missing."/>
<parameter key="filters_entry_key" value="reviewText.is_missing."/>
</list>
<parameter key="filters_logic_and" value="true"/>
<parameter key="filters_check_metadata" value="true"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="8.1.003" expanded="true" height="82" name="Select Attributes (2)" width="90" x="514" y="187">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attribute" value=""/>
<parameter key="attributes" value="reviewText|overall"/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="attribute_value"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="time"/>
<parameter key="block_type" value="attribute_block"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_matrix_row_start"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
</operator>
<operator activated="true" class="set_role" compatibility="8.1.003" expanded="true" height="82" name="Set Role (2)" width="90" x="648" y="187">
<parameter key="attribute_name" value="overall"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="nominal_to_text" compatibility="8.1.003" expanded="true" height="82" name="Nominal to Text (2)" width="90" x="782" y="187">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="reviewText"/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="nominal"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="file_path"/>
<parameter key="block_type" value="single_value"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="single_value"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
</operator>
<connect from_port="in 1" to_op="Replace 1.0 &gt; negative (2)" to_port="example set input"/>
<connect from_op="Replace 1.0 &gt; negative (2)" from_port="example set output" to_op="Replace 2.0 &gt; negative (2)" to_port="example set input"/>
<connect from_op="Replace 2.0 &gt; negative (2)" from_port="example set output" to_op="Replace 3.0 &gt; neutral (2)" to_port="example set input"/>
<connect from_op="Replace 3.0 &gt; neutral (2)" from_port="example set output" to_op="Replace 4.0 &gt; positive (2)" to_port="example set input"/>
<connect from_op="Replace 4.0 &gt; positive (2)" from_port="example set output" to_op="Replace 5.0 &gt; positive (2)" to_port="example set input"/>
<connect from_op="Replace 5.0 &gt; positive (2)" from_port="example set output" to_op="Declare Missing Value (2)" to_port="example set input"/>
<connect from_op="Declare Missing Value (2)" from_port="example set output" to_op="Filter Examples (2)" to_port="example set input"/>
<connect from_op="Filter Examples (2)" from_port="example set output" to_op="Select Attributes (2)" to_port="example set input"/>
<connect from_op="Select Attributes (2)" from_port="example set output" to_op="Set Role (2)" to_port="example set input"/>
<connect from_op="Set Role (2)" from_port="example set output" to_op="Nominal to Text (2)" to_port="example set input"/>
<connect from_op="Nominal to Text (2)" from_port="example set output" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
<operator activated="true" class="split_data" compatibility="8.1.003" expanded="true" height="103" name="Split Data" width="90" x="648" y="442">
<enumeration key="partitions">
<parameter key="ratio" value="0.7"/>
<parameter key="ratio" value="0.3"/>
</enumeration>
<parameter key="sampling_type" value="automatic"/>
<parameter key="use_local_random_seed" value="false"/>
<parameter key="local_random_seed" value="1992"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
<operator activated="true" class="operator_toolbox:smote" compatibility="1.0.000" expanded="true" height="82" name="Smote Upsampling (2)" width="90" x="782" y="289">
<parameter key="number_of_neighbours" value="5"/>
<parameter key="normalize" value="true"/>
<parameter key="equalize_classes" value="true"/>
<parameter key="upsampling_size" value="1000"/>
<parameter key="auto_detect_minority_class" value="true"/>
<parameter key="round_integers" value="true"/>
<parameter key="nominal_change_rate" value="0.5"/>
<parameter key="use_local_random_seed" value="false"/>
<parameter key="local_random_seed" value="1992"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
<operator activated="true" class="sample" compatibility="8.1.003" expanded="true" height="82" name="Sample (2)" width="90" x="782" y="391">
<parameter key="sample" value="relative"/>
<parameter key="balance_data" value="true"/>
<parameter key="sample_size" value="100"/>
<parameter key="sample_ratio" value="0.1"/>
<parameter key="sample_probability" value="0.1"/>
<list key="sample_size_per_class">
<parameter key="positive" value="1000"/>
<parameter key="negative" value="1000"/>
<parameter key="neutral" value="1000"/>
</list>
<list key="sample_ratio_per_class">
<parameter key="negative" value="1.0"/>
<parameter key="positive" value="0.3"/>
<parameter key="neutral" value="1.0"/>
</list>
<list key="sample_probability_per_class"/>
<parameter key="use_local_random_seed" value="false"/>
<parameter key="local_random_seed" value="1992"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
<operator activated="true" class="text:process_document_from_data" compatibility="8.1.000" expanded="true" height="82" name="Process Documents from Data (2)" width="90" x="916" y="340">
<parameter key="create_word_vector" value="true"/>
<parameter key="vector_creation" value="TF-IDF"/>
<parameter key="add_meta_information" value="true"/>
<parameter key="keep_text" value="false"/>
<parameter key="prune_method" value="percentual"/>
<parameter key="prune_below_percent" value="1.0"/>
<parameter key="prune_above_percent" value="30.0"/>
<parameter key="prune_below_rank" value="0.05"/>
<parameter key="prune_above_rank" value="0.95"/>
<parameter key="datamanagement" value="double_sparse_array"/>
<parameter key="data_management" value="auto"/>
<parameter key="select_attributes_and_weights" value="false"/>
<list key="specify_weights"/>
<process expanded="true">
<operator activated="true" class="text:tokenize" compatibility="8.1.000" expanded="true" height="68" name="Tokenize (2)" width="90" x="179" y="34">
<parameter key="mode" value="non letters"/>
<parameter key="characters" value=".:"/>
<parameter key="language" value="English"/>
<parameter key="max_token_length" value="3"/>
</operator>
<operator activated="true" class="text:stem_porter" compatibility="8.1.000" expanded="true" height="68" name="Stem (2)" width="90" x="380" y="34"/>
<operator activated="true" class="text:filter_stopwords_english" compatibility="8.1.000" expanded="true" height="68" name="Filter Stopwords (2)" width="90" x="581" y="34"/>
<operator activated="true" class="text:transform_cases" compatibility="8.1.000" expanded="true" height="68" name="Transform Cases (2)" width="90" x="715" y="34">
<parameter key="transform_to" value="lower case"/>
</operator>
<operator activated="true" class="text:filter_by_length" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (2)" width="90" x="849" y="34">
<parameter key="min_chars" value="2"/>
<parameter key="max_chars" value="15"/>
</operator>
<connect from_port="document" to_op="Tokenize (2)" to_port="document"/>
<connect from_op="Tokenize (2)" from_port="document" to_op="Stem (2)" to_port="document"/>
<connect from_op="Stem (2)" from_port="document" to_op="Filter Stopwords (2)" to_port="document"/>
<connect from_op="Filter Stopwords (2)" from_port="document" to_op="Transform Cases (2)" to_port="document"/>
<connect from_op="Transform Cases (2)" from_port="document" to_op="Filter Tokens (2)" to_port="document"/>
<connect from_op="Filter Tokens (2)" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
<operator activated="true" class="singular_value_decomposition" compatibility="8.1.003" expanded="true" height="103" name="SVD (2)" width="90" x="1050" y="289">
<parameter key="dimensionality_reduction" value="fixed number"/>
<parameter key="percentage_threshold" value="0.95"/>
<parameter key="dimensions" value="100"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
<operator activated="true" class="concurrency:cross_validation" compatibility="8.1.003" expanded="true" height="145" name="Cross Validation (2)" width="90" x="1184" y="289">
<parameter key="split_on_batch_attribute" value="false"/>
<parameter key="leave_one_out" value="false"/>
<parameter key="number_of_folds" value="2"/>
<parameter key="sampling_type" value="automatic"/>
<parameter key="use_local_random_seed" value="false"/>
<parameter key="local_random_seed" value="1992"/>
<parameter key="enable_parallel_execution" value="true"/>
<process expanded="true">
<operator activated="true" class="k_nn" compatibility="8.1.003" expanded="true" height="82" name="k-NN (2)" width="90" x="179" y="34">
<parameter key="k" value="1"/>
<parameter key="weighted_vote" value="false"/>
<parameter key="measure_types" value="MixedMeasures"/>
<parameter key="mixed_measure" value="MixedEuclideanDistance"/>
<parameter key="nominal_measure" value="NominalDistance"/>
<parameter key="numerical_measure" value="EuclideanDistance"/>
<parameter key="divergence" value="GeneralizedIDivergence"/>
<parameter key="kernel_type" value="radial"/>
<parameter key="kernel_gamma" value="1.0"/>
<parameter key="kernel_sigma1" value="1.0"/>
<parameter key="kernel_sigma2" value="0.0"/>
<parameter key="kernel_sigma3" value="2.0"/>
<parameter key="kernel_degree" value="3.0"/>
<parameter key="kernel_shift" value="1.0"/>
<parameter key="kernel_a" value="1.0"/>
<parameter key="kernel_b" value="0.0"/>
</operator>
<operator activated="false" class="naive_bayes" compatibility="8.1.003" expanded="true" height="82" name="Naive Bayes (2)" width="90" x="179" y="238">
<parameter key="laplace_correction" value="true"/>
</operator>
<operator activated="false" class="h2o:generalized_linear_model" compatibility="7.6.001" expanded="true" height="124" name="Generalized Linear Model (2)" width="90" x="179" y="340">
<parameter key="family" value="AUTO"/>
<parameter key="link" value="family_default"/>
<parameter key="solver" value="AUTO"/>
<parameter key="reproducible" value="false"/>
<parameter key="maximum_number_of_threads" value="4"/>
<parameter key="use_regularization" value="true"/>
<parameter key="lambda_search" value="false"/>
<parameter key="number_of_lambdas" value="0"/>
<parameter key="lambda_min_ratio" value="0.0"/>
<parameter key="early_stopping" value="true"/>
<parameter key="stopping_rounds" value="3"/>
<parameter key="stopping_tolerance" value="0.001"/>
<parameter key="standardize" value="true"/>
<parameter key="non-negative_coefficients" value="false"/>
<parameter key="add_intercept" value="true"/>
<parameter key="compute_p-values" value="false"/>
<parameter key="remove_collinear_columns" value="false"/>
<parameter key="missing_values_handling" value="MeanImputation"/>
<parameter key="max_iterations" value="0"/>
<parameter key="specify_beta_constraints" value="false"/>
<list key="beta_constraints"/>
<parameter key="max_runtime_seconds" value="0"/>
<list key="expert_parameters"/>
</operator>
<connect from_port="training set" to_op="k-NN (2)" to_port="training set"/>
<connect from_op="k-NN (2)" from_port="model" to_port="model"/>
<portSpacing port="source_training set" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="8.1.003" expanded="true" height="82" name="Apply Model (4)" width="90" x="112" y="34">
<list key="application_parameters"/>
<parameter key="create_view" value="false"/>
</operator>
<operator activated="true" class="performance_classification" compatibility="8.1.003" expanded="true" height="82" name="Performance training (2)" width="90" x="246" y="34">
<parameter key="main_criterion" value="accuracy"/>
<parameter key="accuracy" value="true"/>
<parameter key="classification_error" value="true"/>
<parameter key="kappa" value="false"/>
<parameter key="weighted_mean_recall" value="true"/>
<parameter key="weighted_mean_precision" value="true"/>
<parameter key="spearman_rho" value="false"/>
<parameter key="kendall_tau" value="false"/>
<parameter key="absolute_error" value="false"/>
<parameter key="relative_error" value="false"/>
<parameter key="relative_error_lenient" value="false"/>
<parameter key="relative_error_strict" value="false"/>
<parameter key="normalized_absolute_error" value="false"/>
<parameter key="root_mean_squared_error" value="false"/>
<parameter key="root_relative_squared_error" value="false"/>
<parameter key="squared_error" value="false"/>
<parameter key="correlation" value="false"/>
<parameter key="squared_correlation" value="false"/>
<parameter key="cross-entropy" value="false"/>
<parameter key="margin" value="false"/>
<parameter key="soft_margin_loss" value="false"/>
<parameter key="logistic_loss" value="true"/>
<parameter key="skip_undefined_labels" value="true"/>
<parameter key="use_example_weights" value="true"/>
<list key="class_weights"/>
</operator>
<connect from_port="model" to_op="Apply Model (4)" to_port="model"/>
<connect from_port="test set" to_op="Apply Model (4)" to_port="unlabelled data"/>
<connect from_op="Apply Model (4)" from_port="labelled data" to_op="Performance training (2)" to_port="labelled data"/>
<connect from_op="Performance training (2)" from_port="performance" to_port="performance 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_test set results" spacing="0"/>
<portSpacing port="sink_performance 1" spacing="0"/>
<portSpacing port="sink_performance 2" spacing="0"/>
</process>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
<operator activated="true" class="concurrency:cross_validation" compatibility="8.1.003" expanded="true" height="145" name="Cross Validation" width="90" x="1184" y="136">
<parameter key="split_on_batch_attribute" value="false"/>
<parameter key="leave_one_out" value="false"/>
<parameter key="number_of_folds" value="2"/>
<parameter key="sampling_type" value="automatic"/>
<parameter key="use_local_random_seed" value="false"/>
<parameter key="local_random_seed" value="1992"/>
<parameter key="enable_parallel_execution" value="true"/>
<process expanded="true">
<operator activated="true" class="k_nn" compatibility="8.1.003" expanded="true" height="82" name="k-NN" width="90" x="179" y="34">
<parameter key="k" value="1"/>
<parameter key="weighted_vote" value="false"/>
<parameter key="measure_types" value="MixedMeasures"/>
<parameter key="mixed_measure" value="MixedEuclideanDistance"/>
<parameter key="nominal_measure" value="NominalDistance"/>
<parameter key="numerical_measure" value="EuclideanDistance"/>
<parameter key="divergence" value="GeneralizedIDivergence"/>
<parameter key="kernel_type" value="radial"/>
<parameter key="kernel_gamma" value="1.0"/>
<parameter key="kernel_sigma1" value="1.0"/>
<parameter key="kernel_sigma2" value="0.0"/>
<parameter key="kernel_sigma3" value="2.0"/>
<parameter key="kernel_degree" value="3.0"/>
<parameter key="kernel_shift" value="1.0"/>
<parameter key="kernel_a" value="1.0"/>
<parameter key="kernel_b" value="0.0"/>
</operator>
<operator activated="false" class="naive_bayes" compatibility="8.1.003" expanded="true" height="82" name="Naive Bayes" width="90" x="179" y="238">
<parameter key="laplace_correction" value="true"/>
</operator>
<operator activated="false" class="h2o:generalized_linear_model" compatibility="7.6.001" expanded="true" height="124" name="Generalized Linear Model" width="90" x="179" y="340">
<parameter key="family" value="AUTO"/>
<parameter key="link" value="family_default"/>
<parameter key="solver" value="AUTO"/>
<parameter key="reproducible" value="false"/>
<parameter key="maximum_number_of_threads" value="4"/>
<parameter key="use_regularization" value="true"/>
<parameter key="lambda_search" value="false"/>
<parameter key="number_of_lambdas" value="0"/>
<parameter key="lambda_min_ratio" value="0.0"/>
<parameter key="early_stopping" value="true"/>
<parameter key="stopping_rounds" value="3"/>
<parameter key="stopping_tolerance" value="0.001"/>
<parameter key="standardize" value="true"/>
<parameter key="non-negative_coefficients" value="false"/>
<parameter key="add_intercept" value="true"/>
<parameter key="compute_p-values" value="false"/>
<parameter key="remove_collinear_columns" value="false"/>
<parameter key="missing_values_handling" value="MeanImputation"/>
<parameter key="max_iterations" value="0"/>
<parameter key="specify_beta_constraints" value="false"/>
<list key="beta_constraints"/>
<parameter key="max_runtime_seconds" value="0"/>
<list key="expert_parameters"/>
</operator>
<connect from_port="training set" to_op="k-NN" to_port="training set"/>
<connect from_op="k-NN" from_port="model" to_port="model"/>
<portSpacing port="source_training set" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="8.1.003" expanded="true" height="82" name="Apply Model" width="90" x="112" y="34">
<list key="application_parameters"/>
<parameter key="create_view" value="false"/>
</operator>
<operator activated="true" class="performance_classification" compatibility="8.1.003" expanded="true" height="82" name="Performance training" width="90" x="246" y="34">
<parameter key="main_criterion" value="accuracy"/>
<parameter key="accuracy" value="true"/>
<parameter key="classification_error" value="true"/>
<parameter key="kappa" value="false"/>
<parameter key="weighted_mean_recall" value="true"/>
<parameter key="weighted_mean_precision" value="true"/>
<parameter key="spearman_rho" value="false"/>
<parameter key="kendall_tau" value="false"/>
<parameter key="absolute_error" value="false"/>
<parameter key="relative_error" value="false"/>
<parameter key="relative_error_lenient" value="false"/>
<parameter key="relative_error_strict" value="false"/>
<parameter key="normalized_absolute_error" value="false"/>
<parameter key="root_mean_squared_error" value="false"/>
<parameter key="root_relative_squared_error" value="false"/>
<parameter key="squared_error" value="false"/>
<parameter key="correlation" value="false"/>
<parameter key="squared_correlation" value="false"/>
<parameter key="cross-entropy" value="false"/>
<parameter key="margin" value="false"/>
<parameter key="soft_margin_loss" value="false"/>
<parameter key="logistic_loss" value="true"/>
<parameter key="skip_undefined_labels" value="true"/>
<parameter key="use_example_weights" value="true"/>
<list key="class_weights"/>
</operator>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance training" to_port="labelled data"/>
<connect from_op="Performance training" from_port="performance" to_port="performance 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_test set results" spacing="0"/>
<portSpacing port="sink_performance 1" spacing="0"/>
<portSpacing port="sink_performance 2" spacing="0"/>
</process>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
<operator activated="true" class="group_models" compatibility="8.1.003" expanded="true" height="103" name="Group Models" width="90" x="1385" y="187"/>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
<operator activated="true" class="text:process_document_from_data" compatibility="8.1.000" expanded="true" height="82" name="Process Documents from Data (3)" width="90" x="916" y="493">
<parameter key="create_word_vector" value="true"/>
<parameter key="vector_creation" value="TF-IDF"/>
<parameter key="add_meta_information" value="true"/>
<parameter key="keep_text" value="false"/>
<parameter key="prune_method" value="percentual"/>
<parameter key="prune_below_percent" value="1.0"/>
<parameter key="prune_above_percent" value="30.0"/>
<parameter key="prune_below_rank" value="0.05"/>
<parameter key="prune_above_rank" value="0.95"/>
<parameter key="datamanagement" value="double_sparse_array"/>
<parameter key="data_management" value="auto"/>
<parameter key="select_attributes_and_weights" value="false"/>
<list key="specify_weights"/>
<process expanded="true">
<operator activated="true" class="text:tokenize" compatibility="8.1.000" expanded="true" height="68" name="Tokenize (3)" width="90" x="179" y="34">
<parameter key="mode" value="non letters"/>
<parameter key="characters" value=".:"/>
<parameter key="language" value="English"/>
<parameter key="max_token_length" value="3"/>
</operator>
<operator activated="true" class="text:stem_porter" compatibility="8.1.000" expanded="true" height="68" name="Stem (3)" width="90" x="380" y="34"/>
<operator activated="true" class="text:filter_stopwords_english" compatibility="8.1.000" expanded="true" height="68" name="Filter Stopwords (3)" width="90" x="581" y="34"/>
<operator activated="true" class="text:transform_cases" compatibility="8.1.000" expanded="true" height="68" name="Transform Cases (3)" width="90" x="715" y="34">
<parameter key="transform_to" value="lower case"/>
</operator>
<operator activated="true" class="text:filter_by_length" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (3)" width="90" x="849" y="34">
<parameter key="min_chars" value="2"/>
<parameter key="max_chars" value="15"/>
</operator>
<connect from_port="document" to_op="Tokenize (3)" to_port="document"/>
<connect from_op="Tokenize (3)" from_port="document" to_op="Stem (3)" to_port="document"/>
<connect from_op="Stem (3)" from_port="document" to_op="Filter Stopwords (3)" to_port="document"/>
<connect from_op="Filter Stopwords (3)" from_port="document" to_op="Transform Cases (3)" to_port="document"/>
<connect from_op="Transform Cases (3)" from_port="document" to_op="Filter Tokens (3)" to_port="document"/>
<connect from_op="Filter Tokens (3)" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
<operator activated="true" class="singular_value_decomposition" compatibility="8.1.003" expanded="true" height="103" name="SVD (3)" width="90" x="1050" y="493">
<parameter key="dimensionality_reduction" value="fixed number"/>
<parameter key="percentage_threshold" value="0.95"/>
<parameter key="dimensions" value="100"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
<operator activated="true" class="apply_model" compatibility="8.1.003" expanded="true" height="82" name="Apply Model (6)" width="90" x="1184" y="493">
<list key="application_parameters"/>
<parameter key="create_view" value="false"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
<operator activated="true" class="apply_model" compatibility="8.1.003" expanded="true" height="82" name="Apply Model (2)" width="90" x="1318" y="493">
<list key="application_parameters"/>
<parameter key="create_view" value="false"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
<operator activated="true" class="apply_model" compatibility="8.1.003" expanded="true" height="82" name="Apply Model (5)" width="90" x="1452" y="493">
<list key="application_parameters"/>
<parameter key="create_view" value="false"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
<operator activated="true" class="apply_model" compatibility="8.1.003" expanded="true" height="82" name="Apply Model (3)" width="90" x="1586" y="493">
<list key="application_parameters"/>
<parameter key="create_view" value="false"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
<operator activated="true" class="performance_classification" compatibility="8.1.003" expanded="true" height="82" name="Test Performance" width="90" x="1586" y="340">
<parameter key="main_criterion" value="first"/>
<parameter key="accuracy" value="true"/>
<parameter key="classification_error" value="true"/>
<parameter key="kappa" value="false"/>
<parameter key="weighted_mean_recall" value="true"/>
<parameter key="weighted_mean_precision" value="true"/>
<parameter key="spearman_rho" value="false"/>
<parameter key="kendall_tau" value="false"/>
<parameter key="absolute_error" value="false"/>
<parameter key="relative_error" value="false"/>
<parameter key="relative_error_lenient" value="false"/>
<parameter key="relative_error_strict" value="false"/>
<parameter key="normalized_absolute_error" value="false"/>
<parameter key="root_mean_squared_error" value="false"/>
<parameter key="root_relative_squared_error" value="false"/>
<parameter key="squared_error" value="false"/>
<parameter key="correlation" value="false"/>
<parameter key="squared_correlation" value="false"/>
<parameter key="cross-entropy" value="false"/>
<parameter key="margin" value="false"/>
<parameter key="soft_margin_loss" value="false"/>
<parameter key="logistic_loss" value="true"/>
<parameter key="skip_undefined_labels" value="true"/>
<parameter key="use_example_weights" value="true"/>
<list key="class_weights"/>
</operator>
</process>

 

 

Tagged:

Answers

  • lionelderkrikor
    lionelderkrikor New Altair Community Member

    HI @me1234,

     

    Your XML process is broken (it can not be imported into RapidMiner).

    To share properly a process, please follow the instructions of this link.

     

    Regards,

     

    Lionel

  • me1234
    me1234 New Altair Community Member

    Thanks for your response, quite strange because I did it like in the video. Here is the .rmp file in the attachment.

     

  • lionelderkrikor
    lionelderkrikor New Altair Community Member

    Hi @me1234,

     

    I inserted SVD, Group Models and Apply Model operators inside the Cross Validation operators.

    Now it works.

    Does this process answer to your need ?

    <?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="8.1.003" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="true" class="read_csv" compatibility="8.1.000" expanded="true" height="68" name="Read Health&amp;PC" width="90" x="514" y="136">
    <parameter key="csv_file" value="C:\Users\Lionel\Documents\Formations_DataScience\Rapidminer\Tests_Rapidminer\SVD operator\amahpc-right-10k_2.csv"/>
    <parameter key="first_row_as_names" value="false"/>
    <list key="annotations">
    <parameter key="0" value="Name"/>
    </list>
    <parameter key="encoding" value="windows-1252"/>
    <list key="data_set_meta_data_information">
    <parameter key="0" value="att1.true.real.attribute"/>
    <parameter key="1" value="asin.true.polynominal.attribute"/>
    <parameter key="2" value="reviewerName.true.polynominal.attribute"/>
    <parameter key="3" value="helpful.true.real.attribute"/>
    <parameter key="4" value="reviewText.true.polynominal.attribute"/>
    <parameter key="5" value="overall.true.nominal.attribute"/>
    <parameter key="6" value="summary.true.polynominal.attribute"/>
    <parameter key="7" value="unixReviewTime.true.real.attribute"/>
    <parameter key="8" value="reviewTime.true.polynominal.attribute"/>
    <parameter key="9" value="X1.true.real.attribute"/>
    <parameter key="10" value="X2.true.real.attribute"/>
    </list>
    </operator>
    <operator activated="true" class="subprocess" compatibility="8.1.003" expanded="true" height="82" name="Pre-processing" width="90" x="648" y="136">
    <process expanded="true">
    <operator activated="true" class="replace" compatibility="8.1.003" expanded="true" height="82" name="Replace 1.0 &gt; negative" width="90" x="179" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="overall"/>
    <parameter key="replace_what" value="1.0"/>
    <parameter key="replace_by" value="negative"/>
    </operator>
    <operator activated="true" class="replace" compatibility="8.1.003" expanded="true" height="82" name="Replace 2.0 &gt; negative" width="90" x="313" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="overall"/>
    <parameter key="replace_what" value="2.0"/>
    <parameter key="replace_by" value="negative"/>
    </operator>
    <operator activated="true" class="replace" compatibility="8.1.003" expanded="true" height="82" name="Replace 3.0 &gt; neutral" width="90" x="447" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="overall"/>
    <parameter key="replace_what" value="3.0"/>
    <parameter key="replace_by" value="neutral"/>
    </operator>
    <operator activated="true" class="replace" compatibility="8.1.003" expanded="true" height="82" name="Replace 4.0 &gt; positive" width="90" x="581" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="overall"/>
    <parameter key="replace_what" value="4.0"/>
    <parameter key="replace_by" value="positive"/>
    </operator>
    <operator activated="true" class="replace" compatibility="8.1.003" expanded="true" height="82" name="Replace 5.0 &gt; positive" width="90" x="715" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="overall"/>
    <parameter key="replace_what" value="5.0"/>
    <parameter key="replace_by" value="positive"/>
    </operator>
    <operator activated="true" class="select_attributes" compatibility="8.1.003" expanded="true" height="82" name="Select Attributes" width="90" x="380" y="136">
    <parameter key="attribute_filter_type" value="subset"/>
    <parameter key="attributes" value="reviewText|overall"/>
    </operator>
    <operator activated="true" class="declare_missing_value" compatibility="8.1.003" expanded="true" height="82" name="Declare Missing Value" width="90" x="514" y="136">
    <parameter key="mode" value="nominal"/>
    <parameter key="nominal_value" value="?"/>
    </operator>
    <operator activated="true" class="filter_examples" compatibility="8.1.003" expanded="true" height="103" name="Filter Examples" width="90" x="648" y="136">
    <parameter key="invert_filter" value="true"/>
    <list key="filters_list">
    <parameter key="filters_entry_key" value="overall.is_missing."/>
    <parameter key="filters_entry_key" value="reviewText.is_missing."/>
    </list>
    </operator>
    <operator activated="true" class="set_role" compatibility="8.1.003" expanded="true" height="82" name="Set Role" width="90" x="782" y="136">
    <parameter key="attribute_name" value="overall"/>
    <parameter key="target_role" value="label"/>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="nominal_to_text" compatibility="8.1.003" expanded="true" height="82" name="Nominal to Text" width="90" x="916" y="136">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="reviewText"/>
    </operator>
    <connect from_port="in 1" to_op="Replace 1.0 &gt; negative" to_port="example set input"/>
    <connect from_op="Replace 1.0 &gt; negative" from_port="example set output" to_op="Replace 2.0 &gt; negative" to_port="example set input"/>
    <connect from_op="Replace 2.0 &gt; negative" from_port="example set output" to_op="Replace 3.0 &gt; neutral" to_port="example set input"/>
    <connect from_op="Replace 3.0 &gt; neutral" from_port="example set output" to_op="Replace 4.0 &gt; positive" to_port="example set input"/>
    <connect from_op="Replace 4.0 &gt; positive" from_port="example set output" to_op="Replace 5.0 &gt; positive" to_port="example set input"/>
    <connect from_op="Replace 5.0 &gt; positive" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
    <connect from_op="Select Attributes" from_port="example set output" to_op="Declare Missing Value" to_port="example set input"/>
    <connect from_op="Declare Missing Value" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
    <connect from_op="Filter Examples" from_port="example set output" to_op="Set Role" to_port="example set input"/>
    <connect from_op="Set Role" from_port="example set output" to_op="Nominal to Text" to_port="example set input"/>
    <connect from_op="Nominal to Text" from_port="example set output" to_port="out 1"/>
    <portSpacing port="source_in 1" spacing="0"/>
    <portSpacing port="source_in 2" spacing="0"/>
    <portSpacing port="sink_out 1" spacing="0"/>
    <portSpacing port="sink_out 2" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="operator_toolbox:smote" compatibility="1.0.000" expanded="true" height="82" name="Smote Upsampling" width="90" x="782" y="85">
    <parameter key="number_of_neighbours" value="2"/>
    </operator>
    <operator activated="true" class="sample" compatibility="8.1.003" expanded="true" height="82" name="Sample" width="90" x="782" y="187">
    <parameter key="sample" value="relative"/>
    <parameter key="balance_data" value="true"/>
    <list key="sample_size_per_class">
    <parameter key="positive" value="1000"/>
    <parameter key="negative" value="1000"/>
    <parameter key="neutral" value="1000"/>
    </list>
    <list key="sample_ratio_per_class">
    <parameter key="negative" value="1.0"/>
    <parameter key="positive" value="0.3"/>
    <parameter key="neutral" value="1.0"/>
    </list>
    <list key="sample_probability_per_class"/>
    </operator>
    <operator activated="true" class="text:process_document_from_data" compatibility="8.1.000" expanded="true" height="82" name="Process Documents from Data" width="90" x="916" y="187">
    <parameter key="prune_method" value="percentual"/>
    <parameter key="prune_below_percent" value="1.0"/>
    <list key="specify_weights"/>
    <process expanded="true">
    <operator activated="true" class="text:tokenize" compatibility="8.1.000" expanded="true" height="68" name="Tokenize" width="90" x="179" y="34"/>
    <operator activated="true" class="text:stem_porter" compatibility="8.1.000" expanded="true" height="68" name="Stem (Porter)" width="90" x="313" y="34"/>
    <operator activated="true" class="text:filter_stopwords_english" compatibility="8.1.000" expanded="true" height="68" name="Filter Stopwords (English)" width="90" x="447" y="34"/>
    <operator activated="true" class="text:transform_cases" compatibility="8.1.000" expanded="true" height="68" name="Transform Cases" width="90" x="581" y="34"/>
    <operator activated="true" class="text:filter_by_length" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (by Length)" width="90" x="782" y="34">
    <parameter key="min_chars" value="2"/>
    <parameter key="max_chars" value="15"/>
    </operator>
    <connect from_port="document" to_op="Tokenize" to_port="document"/>
    <connect from_op="Tokenize" from_port="document" to_op="Stem (Porter)" to_port="document"/>
    <connect from_op="Stem (Porter)" from_port="document" to_op="Filter Stopwords (English)" to_port="document"/>
    <connect from_op="Filter Stopwords (English)" from_port="document" to_op="Transform Cases" to_port="document"/>
    <connect from_op="Transform Cases" from_port="document" to_op="Filter Tokens (by Length)" to_port="document"/>
    <connect from_op="Filter Tokens (by Length)" from_port="document" to_port="document 1"/>
    <portSpacing port="source_document" spacing="0"/>
    <portSpacing port="sink_document 1" spacing="0"/>
    <portSpacing port="sink_document 2" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="concurrency:cross_validation" compatibility="8.1.003" expanded="true" height="145" name="Cross Validation" width="90" x="1184" y="136">
    <parameter key="number_of_folds" value="2"/>
    <process expanded="true">
    <operator activated="false" class="naive_bayes" compatibility="8.1.003" expanded="true" height="82" name="Naive Bayes" width="90" x="179" y="238"/>
    <operator activated="false" class="h2o:generalized_linear_model" compatibility="7.6.001" expanded="true" height="124" name="Generalized Linear Model" width="90" x="179" y="340">
    <list key="beta_constraints"/>
    <list key="expert_parameters"/>
    </operator>
    <operator activated="true" class="singular_value_decomposition" compatibility="8.1.003" expanded="true" height="103" name="SVD" width="90" x="179" y="34">
    <parameter key="dimensions" value="100"/>
    </operator>
    <operator activated="true" class="k_nn" compatibility="8.1.003" expanded="true" height="82" name="k-NN" width="90" x="313" y="34"/>
    <connect from_port="training set" to_op="SVD" to_port="example set input"/>
    <connect from_op="SVD" from_port="example set output" to_op="k-NN" to_port="training set"/>
    <connect from_op="SVD" from_port="preprocessing model" to_port="through 1"/>
    <connect from_op="k-NN" from_port="model" to_port="model"/>
    <portSpacing port="source_training set" spacing="0"/>
    <portSpacing port="sink_model" spacing="0"/>
    <portSpacing port="sink_through 1" spacing="0"/>
    <portSpacing port="sink_through 2" spacing="0"/>
    </process>
    <process expanded="true">
    <operator activated="true" class="apply_model" compatibility="8.1.003" expanded="true" height="82" name="Apply Model (8)" width="90" x="45" y="85">
    <list key="application_parameters"/>
    </operator>
    <operator activated="true" class="apply_model" compatibility="8.1.003" expanded="true" height="82" name="Apply Model" width="90" x="246" y="34">
    <list key="application_parameters"/>
    </operator>
    <operator activated="true" class="performance_classification" compatibility="8.1.003" expanded="true" height="82" name="Performance training" width="90" x="380" y="34">
    <parameter key="main_criterion" value="accuracy"/>
    <parameter key="classification_error" value="true"/>
    <parameter key="weighted_mean_recall" value="true"/>
    <parameter key="weighted_mean_precision" value="true"/>
    <parameter key="logistic_loss" value="true"/>
    <list key="class_weights"/>
    </operator>
    <connect from_port="model" to_op="Apply Model" to_port="model"/>
    <connect from_port="test set" to_op="Apply Model (8)" to_port="unlabelled data"/>
    <connect from_port="through 1" to_op="Apply Model (8)" to_port="model"/>
    <connect from_op="Apply Model (8)" from_port="labelled data" to_op="Apply Model" to_port="unlabelled data"/>
    <connect from_op="Apply Model" from_port="labelled data" to_op="Performance training" to_port="labelled data"/>
    <connect from_op="Performance training" from_port="performance" to_port="performance 1"/>
    <portSpacing port="source_model" spacing="0"/>
    <portSpacing port="source_test set" spacing="0"/>
    <portSpacing port="source_through 1" spacing="0"/>
    <portSpacing port="source_through 2" spacing="0"/>
    <portSpacing port="sink_test set results" spacing="0"/>
    <portSpacing port="sink_performance 1" spacing="0"/>
    <portSpacing port="sink_performance 2" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="read_csv" compatibility="8.1.003" expanded="true" height="68" name="Read Beauty" width="90" x="514" y="289">
    <parameter key="csv_file" value="C:\Users\Lionel\Documents\Formations_DataScience\Rapidminer\Tests_Rapidminer\SVD operator\amabea-right-10k_2.csv"/>
    <parameter key="first_row_as_names" value="false"/>
    <list key="annotations">
    <parameter key="0" value="Name"/>
    </list>
    <parameter key="encoding" value="windows-1252"/>
    <list key="data_set_meta_data_information">
    <parameter key="0" value="att1.true.real.attribute"/>
    <parameter key="1" value="reviewerID.true.polynominal.attribute"/>
    <parameter key="2" value="summary.true.polynominal.attribute"/>
    <parameter key="3" value="reviewerName.true.polynominal.attribute"/>
    <parameter key="4" value="unixReviewTime.true.real.attribute"/>
    <parameter key="5" value="reviewText.true.polynominal.attribute"/>
    <parameter key="6" value="overall.true.nominal.attribute"/>
    <parameter key="7" value="helpful.true.real.attribute"/>
    <parameter key="8" value="asin.true.polynominal.attribute"/>
    <parameter key="9" value="X1.true.real.attribute"/>
    <parameter key="10" value="X2.true.real.attribute"/>
    </list>
    </operator>
    <operator activated="true" class="subprocess" compatibility="8.1.003" expanded="true" height="82" name="Pre-processing (2)" width="90" x="648" y="289">
    <process expanded="true">
    <operator activated="true" class="replace" compatibility="8.1.003" expanded="true" height="82" name="Replace 1.0 &gt; negative (2)" width="90" x="45" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="overall"/>
    <parameter key="replace_what" value="1.0"/>
    <parameter key="replace_by" value="negative"/>
    </operator>
    <operator activated="true" class="replace" compatibility="8.1.003" expanded="true" height="82" name="Replace 2.0 &gt; negative (2)" width="90" x="179" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="overall"/>
    <parameter key="replace_what" value="2.0"/>
    <parameter key="replace_by" value="negative"/>
    </operator>
    <operator activated="true" class="replace" compatibility="8.1.003" expanded="true" height="82" name="Replace 3.0 &gt; neutral (2)" width="90" x="313" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="overall"/>
    <parameter key="replace_what" value="3.0"/>
    <parameter key="replace_by" value="neutral"/>
    </operator>
    <operator activated="true" class="replace" compatibility="8.1.003" expanded="true" height="82" name="Replace 4.0 &gt; positive (2)" width="90" x="447" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="overall"/>
    <parameter key="replace_what" value="4.0"/>
    <parameter key="replace_by" value="positive"/>
    </operator>
    <operator activated="true" class="replace" compatibility="8.1.003" expanded="true" height="82" name="Replace 5.0 &gt; positive (2)" width="90" x="581" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="overall"/>
    <parameter key="replace_what" value="5.0"/>
    <parameter key="replace_by" value="positive"/>
    </operator>
    <operator activated="true" class="declare_missing_value" compatibility="8.1.003" expanded="true" height="82" name="Declare Missing Value (2)" width="90" x="246" y="187">
    <parameter key="mode" value="nominal"/>
    <parameter key="nominal_value" value="?"/>
    </operator>
    <operator activated="true" class="filter_examples" compatibility="8.1.003" expanded="true" height="103" name="Filter Examples (2)" width="90" x="380" y="187">
    <parameter key="invert_filter" value="true"/>
    <list key="filters_list">
    <parameter key="filters_entry_key" value="overall.is_missing."/>
    <parameter key="filters_entry_key" value="reviewText.is_missing."/>
    </list>
    </operator>
    <operator activated="true" class="select_attributes" compatibility="8.1.003" expanded="true" height="82" name="Select Attributes (2)" width="90" x="514" y="187">
    <parameter key="attribute_filter_type" value="subset"/>
    <parameter key="attributes" value="reviewText|overall"/>
    </operator>
    <operator activated="true" class="set_role" compatibility="8.1.003" expanded="true" height="82" name="Set Role (2)" width="90" x="648" y="187">
    <parameter key="attribute_name" value="overall"/>
    <parameter key="target_role" value="label"/>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="nominal_to_text" compatibility="8.1.003" expanded="true" height="82" name="Nominal to Text (2)" width="90" x="782" y="187">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="reviewText"/>
    </operator>
    <connect from_port="in 1" to_op="Replace 1.0 &gt; negative (2)" to_port="example set input"/>
    <connect from_op="Replace 1.0 &gt; negative (2)" from_port="example set output" to_op="Replace 2.0 &gt; negative (2)" to_port="example set input"/>
    <connect from_op="Replace 2.0 &gt; negative (2)" from_port="example set output" to_op="Replace 3.0 &gt; neutral (2)" to_port="example set input"/>
    <connect from_op="Replace 3.0 &gt; neutral (2)" from_port="example set output" to_op="Replace 4.0 &gt; positive (2)" to_port="example set input"/>
    <connect from_op="Replace 4.0 &gt; positive (2)" from_port="example set output" to_op="Replace 5.0 &gt; positive (2)" to_port="example set input"/>
    <connect from_op="Replace 5.0 &gt; positive (2)" from_port="example set output" to_op="Declare Missing Value (2)" to_port="example set input"/>
    <connect from_op="Declare Missing Value (2)" from_port="example set output" to_op="Filter Examples (2)" to_port="example set input"/>
    <connect from_op="Filter Examples (2)" from_port="example set output" to_op="Select Attributes (2)" to_port="example set input"/>
    <connect from_op="Select Attributes (2)" from_port="example set output" to_op="Set Role (2)" to_port="example set input"/>
    <connect from_op="Set Role (2)" from_port="example set output" to_op="Nominal to Text (2)" to_port="example set input"/>
    <connect from_op="Nominal to Text (2)" from_port="example set output" to_port="out 1"/>
    <portSpacing port="source_in 1" spacing="0"/>
    <portSpacing port="source_in 2" spacing="0"/>
    <portSpacing port="sink_out 1" spacing="0"/>
    <portSpacing port="sink_out 2" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="split_data" compatibility="8.1.003" expanded="true" height="103" name="Split Data" width="90" x="648" y="442">
    <enumeration key="partitions">
    <parameter key="ratio" value="0.7"/>
    <parameter key="ratio" value="0.3"/>
    </enumeration>
    </operator>
    <operator activated="true" class="operator_toolbox:smote" compatibility="1.0.000" expanded="true" height="82" name="Smote Upsampling (2)" width="90" x="782" y="289">
    <parameter key="number_of_neighbours" value="2"/>
    </operator>
    <operator activated="true" class="sample" compatibility="8.1.003" expanded="true" height="82" name="Sample (2)" width="90" x="782" y="391">
    <parameter key="sample" value="relative"/>
    <parameter key="balance_data" value="true"/>
    <list key="sample_size_per_class">
    <parameter key="positive" value="1000"/>
    <parameter key="negative" value="1000"/>
    <parameter key="neutral" value="1000"/>
    </list>
    <list key="sample_ratio_per_class">
    <parameter key="negative" value="1.0"/>
    <parameter key="positive" value="0.3"/>
    <parameter key="neutral" value="1.0"/>
    </list>
    <list key="sample_probability_per_class"/>
    </operator>
    <operator activated="true" class="text:process_document_from_data" compatibility="8.1.000" expanded="true" height="82" name="Process Documents from Data (2)" width="90" x="916" y="340">
    <parameter key="prune_method" value="percentual"/>
    <parameter key="prune_below_percent" value="1.0"/>
    <list key="specify_weights"/>
    <process expanded="true">
    <operator activated="true" class="text:tokenize" compatibility="8.1.000" expanded="true" height="68" name="Tokenize (2)" width="90" x="179" y="34"/>
    <operator activated="true" class="text:stem_porter" compatibility="8.1.000" expanded="true" height="68" name="Stem (2)" width="90" x="380" y="34"/>
    <operator activated="true" class="text:filter_stopwords_english" compatibility="8.1.000" expanded="true" height="68" name="Filter Stopwords (2)" width="90" x="581" y="34"/>
    <operator activated="true" class="text:transform_cases" compatibility="8.1.000" expanded="true" height="68" name="Transform Cases (2)" width="90" x="715" y="34"/>
    <operator activated="true" class="text:filter_by_length" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (2)" width="90" x="849" y="34">
    <parameter key="min_chars" value="2"/>
    <parameter key="max_chars" value="15"/>
    </operator>
    <connect from_port="document" to_op="Tokenize (2)" to_port="document"/>
    <connect from_op="Tokenize (2)" from_port="document" to_op="Stem (2)" to_port="document"/>
    <connect from_op="Stem (2)" from_port="document" to_op="Filter Stopwords (2)" to_port="document"/>
    <connect from_op="Filter Stopwords (2)" from_port="document" to_op="Transform Cases (2)" to_port="document"/>
    <connect from_op="Transform Cases (2)" from_port="document" to_op="Filter Tokens (2)" to_port="document"/>
    <connect from_op="Filter Tokens (2)" from_port="document" to_port="document 1"/>
    <portSpacing port="source_document" spacing="0"/>
    <portSpacing port="sink_document 1" spacing="0"/>
    <portSpacing port="sink_document 2" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="concurrency:cross_validation" compatibility="8.1.003" expanded="true" height="145" name="Cross Validation (2)" width="90" x="1184" y="289">
    <parameter key="number_of_folds" value="2"/>
    <process expanded="true">
    <operator activated="true" class="singular_value_decomposition" compatibility="8.1.003" expanded="true" height="103" name="SVD (2)" width="90" x="45" y="34">
    <parameter key="dimensions" value="100"/>
    </operator>
    <operator activated="true" class="k_nn" compatibility="8.1.003" expanded="true" height="82" name="k-NN (2)" width="90" x="179" y="34"/>
    <connect from_port="training set" to_op="SVD (2)" to_port="example set input"/>
    <connect from_op="SVD (2)" from_port="example set output" to_op="k-NN (2)" to_port="training set"/>
    <connect from_op="SVD (2)" from_port="preprocessing model" to_port="through 1"/>
    <connect from_op="k-NN (2)" from_port="model" to_port="model"/>
    <portSpacing port="source_training set" spacing="0"/>
    <portSpacing port="sink_model" spacing="0"/>
    <portSpacing port="sink_through 1" spacing="0"/>
    <portSpacing port="sink_through 2" spacing="0"/>
    </process>
    <process expanded="true">
    <operator activated="true" class="apply_model" compatibility="8.1.003" expanded="true" height="82" name="Apply Model (9)" width="90" x="45" y="85">
    <list key="application_parameters"/>
    </operator>
    <operator activated="true" class="apply_model" compatibility="8.1.003" expanded="true" height="82" name="Apply Model (4)" width="90" x="246" y="34">
    <list key="application_parameters"/>
    </operator>
    <operator activated="true" class="performance_classification" compatibility="8.1.003" expanded="true" height="82" name="Performance training (2)" width="90" x="380" y="34">
    <parameter key="main_criterion" value="accuracy"/>
    <parameter key="classification_error" value="true"/>
    <parameter key="weighted_mean_recall" value="true"/>
    <parameter key="weighted_mean_precision" value="true"/>
    <parameter key="logistic_loss" value="true"/>
    <list key="class_weights"/>
    </operator>
    <connect from_port="model" to_op="Apply Model (4)" to_port="model"/>
    <connect from_port="test set" to_op="Apply Model (9)" to_port="unlabelled data"/>
    <connect from_port="through 1" to_op="Apply Model (9)" to_port="model"/>
    <connect from_op="Apply Model (9)" from_port="labelled data" to_op="Apply Model (4)" to_port="unlabelled data"/>
    <connect from_op="Apply Model (4)" from_port="labelled data" to_op="Performance training (2)" to_port="labelled data"/>
    <connect from_op="Performance training (2)" from_port="performance" to_port="performance 1"/>
    <portSpacing port="source_model" spacing="0"/>
    <portSpacing port="source_test set" spacing="0"/>
    <portSpacing port="source_through 1" spacing="0"/>
    <portSpacing port="source_through 2" spacing="0"/>
    <portSpacing port="sink_test set results" spacing="0"/>
    <portSpacing port="sink_performance 1" spacing="0"/>
    <portSpacing port="sink_performance 2" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="text:process_document_from_data" compatibility="8.1.000" expanded="true" height="82" name="Process Documents from Data (3)" width="90" x="916" y="493">
    <parameter key="prune_method" value="percentual"/>
    <parameter key="prune_below_percent" value="1.0"/>
    <list key="specify_weights"/>
    <process expanded="true">
    <operator activated="true" class="text:tokenize" compatibility="8.1.000" expanded="true" height="68" name="Tokenize (3)" width="90" x="179" y="34"/>
    <operator activated="true" class="text:stem_porter" compatibility="8.1.000" expanded="true" height="68" name="Stem (3)" width="90" x="380" y="34"/>
    <operator activated="true" class="text:filter_stopwords_english" compatibility="8.1.000" expanded="true" height="68" name="Filter Stopwords (3)" width="90" x="581" y="34"/>
    <operator activated="true" class="text:transform_cases" compatibility="8.1.000" expanded="true" height="68" name="Transform Cases (3)" width="90" x="715" y="34"/>
    <operator activated="true" class="text:filter_by_length" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (3)" width="90" x="849" y="34">
    <parameter key="min_chars" value="2"/>
    <parameter key="max_chars" value="15"/>
    </operator>
    <connect from_port="document" to_op="Tokenize (3)" to_port="document"/>
    <connect from_op="Tokenize (3)" from_port="document" to_op="Stem (3)" to_port="document"/>
    <connect from_op="Stem (3)" from_port="document" to_op="Filter Stopwords (3)" to_port="document"/>
    <connect from_op="Filter Stopwords (3)" from_port="document" to_op="Transform Cases (3)" to_port="document"/>
    <connect from_op="Transform Cases (3)" from_port="document" to_op="Filter Tokens (3)" to_port="document"/>
    <connect from_op="Filter Tokens (3)" from_port="document" to_port="document 1"/>
    <portSpacing port="source_document" spacing="0"/>
    <portSpacing port="sink_document 1" spacing="0"/>
    <portSpacing port="sink_document 2" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="concurrency:cross_validation" compatibility="8.1.003" expanded="true" height="145" name="Cross Validation (3)" width="90" x="1184" y="493">
    <process expanded="true">
    <operator activated="true" class="singular_value_decomposition" compatibility="8.1.003" expanded="true" height="103" name="SVD (5)" width="90" x="45" y="238">
    <parameter key="dimensions" value="100"/>
    </operator>
    <operator activated="true" class="multiply" compatibility="8.1.003" expanded="true" height="103" name="Multiply" width="90" x="112" y="34"/>
    <operator activated="true" class="naive_bayes" compatibility="8.1.003" expanded="true" height="82" name="Naive Bayes (2)" width="90" x="246" y="136"/>
    <operator activated="true" class="k_nn" compatibility="8.1.003" expanded="true" height="82" name="k-NN (3)" width="90" x="246" y="34"/>
    <operator activated="true" class="group_models" compatibility="8.1.003" expanded="true" height="103" name="Group Models" width="90" x="380" y="34"/>
    <connect from_port="training set" to_op="SVD (5)" to_port="example set input"/>
    <connect from_op="SVD (5)" from_port="example set output" to_op="Multiply" to_port="input"/>
    <connect from_op="SVD (5)" from_port="preprocessing model" to_port="through 1"/>
    <connect from_op="Multiply" from_port="output 1" to_op="k-NN (3)" to_port="training set"/>
    <connect from_op="Multiply" from_port="output 2" to_op="Naive Bayes (2)" to_port="training set"/>
    <connect from_op="Naive Bayes (2)" from_port="model" to_op="Group Models" to_port="models in 2"/>
    <connect from_op="k-NN (3)" from_port="model" to_op="Group Models" to_port="models in 1"/>
    <connect from_op="Group Models" from_port="model out" to_port="model"/>
    <portSpacing port="source_training set" spacing="0"/>
    <portSpacing port="sink_model" spacing="0"/>
    <portSpacing port="sink_through 1" spacing="0"/>
    <portSpacing port="sink_through 2" spacing="0"/>
    </process>
    <process expanded="true">
    <operator activated="true" class="apply_model" compatibility="8.1.003" expanded="true" height="82" name="Apply Model (3)" width="90" x="112" y="34">
    <list key="application_parameters"/>
    </operator>
    <operator activated="true" class="apply_model" compatibility="8.1.003" expanded="true" height="82" name="Apply Model (7)" width="90" x="246" y="34">
    <list key="application_parameters"/>
    </operator>
    <operator activated="true" class="performance_classification" compatibility="8.1.003" expanded="true" height="82" name="Test Performance (2)" width="90" x="380" y="34">
    <parameter key="classification_error" value="true"/>
    <parameter key="weighted_mean_recall" value="true"/>
    <parameter key="weighted_mean_precision" value="true"/>
    <parameter key="logistic_loss" value="true"/>
    <list key="class_weights"/>
    </operator>
    <connect from_port="model" to_op="Apply Model (7)" to_port="model"/>
    <connect from_port="test set" to_op="Apply Model (3)" to_port="unlabelled data"/>
    <connect from_port="through 1" to_op="Apply Model (3)" to_port="model"/>
    <connect from_op="Apply Model (3)" from_port="labelled data" to_op="Apply Model (7)" to_port="unlabelled data"/>
    <connect from_op="Apply Model (7)" from_port="labelled data" to_op="Test Performance (2)" to_port="labelled data"/>
    <connect from_op="Test Performance (2)" from_port="performance" to_port="performance 1"/>
    <portSpacing port="source_model" spacing="0"/>
    <portSpacing port="source_test set" spacing="0"/>
    <portSpacing port="source_through 1" spacing="0"/>
    <portSpacing port="source_through 2" spacing="0"/>
    <portSpacing port="sink_test set results" spacing="0"/>
    <portSpacing port="sink_performance 1" spacing="0"/>
    <portSpacing port="sink_performance 2" spacing="0"/>
    </process>
    </operator>
    <connect from_op="Read Health&amp;PC" from_port="output" to_op="Pre-processing" to_port="in 1"/>
    <connect from_op="Pre-processing" from_port="out 1" to_op="Smote Upsampling" to_port="exa"/>
    <connect from_op="Smote Upsampling" from_port="ups" to_op="Sample" to_port="example set input"/>
    <connect from_op="Sample" from_port="example set output" to_op="Process Documents from Data" to_port="example set"/>
    <connect from_op="Process Documents from Data" from_port="example set" to_op="Cross Validation" to_port="example set"/>
    <connect from_op="Process Documents from Data" from_port="word list" to_op="Process Documents from Data (2)" to_port="word list"/>
    <connect from_op="Cross Validation" from_port="performance 1" to_port="result 1"/>
    <connect from_op="Read Beauty" from_port="output" to_op="Pre-processing (2)" to_port="in 1"/>
    <connect from_op="Pre-processing (2)" from_port="out 1" to_op="Split Data" to_port="example set"/>
    <connect from_op="Split Data" from_port="partition 1" to_op="Smote Upsampling (2)" to_port="exa"/>
    <connect from_op="Split Data" from_port="partition 2" to_op="Process Documents from Data (3)" to_port="example set"/>
    <connect from_op="Smote Upsampling (2)" from_port="ups" to_op="Sample (2)" to_port="example set input"/>
    <connect from_op="Sample (2)" from_port="example set output" to_op="Process Documents from Data (2)" to_port="example set"/>
    <connect from_op="Process Documents from Data (2)" from_port="example set" to_op="Cross Validation (2)" to_port="example set"/>
    <connect from_op="Process Documents from Data (2)" from_port="word list" to_op="Process Documents from Data (3)" to_port="word list"/>
    <connect from_op="Cross Validation (2)" from_port="performance 1" to_port="result 2"/>
    <connect from_op="Process Documents from Data (3)" from_port="example set" to_op="Cross Validation (3)" to_port="example set"/>
    <connect from_op="Cross Validation (3)" from_port="model" to_port="result 3"/>
    <connect from_op="Cross Validation (3)" from_port="performance 1" to_port="result 4"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    <portSpacing port="sink_result 3" spacing="0"/>
    <portSpacing port="sink_result 4" spacing="0"/>
    <portSpacing port="sink_result 5" spacing="0"/>
    <description align="center" color="yellow" colored="false" height="212" resized="true" width="290" x="466" y="30">Step 1 - Load &amp;amp; Step 2 Prepare Data (Source)</description>
    <description align="center" color="green" colored="true" height="581" resized="true" width="125" x="761" y="29">Step 4 - Sample</description>
    <description align="left" color="yellow" colored="false" height="579" resized="true" width="138" x="893" y="29">Step 5 - Process documents from Data&lt;br&gt;</description>
    <description align="center" color="purple" colored="true" height="576" resized="true" width="124" x="1036" y="29">Step 6 - Latent Semantic Indexing with Topic Modelling</description>
    <description align="left" color="green" colored="true" height="270" resized="true" width="527" x="1164" y="28">Step 7 Optimizing &amp;amp; Training&lt;br&gt;kNN&lt;br&gt;Naives Bayes&lt;br&gt;GLM&lt;br&gt;</description>
    <description align="center" color="orange" colored="true" height="364" resized="true" width="291" x="466" y="247">Step 3 - Load &amp;amp; Step 2 - Prepare Data (Target)</description>
    <description align="right" color="purple" colored="true" height="193" resized="true" width="144" x="610" y="411">Step 3 - TL Split</description>
    </process>
    </operator>
    </process>

     

    Regards,

     

    Lionel 

  • me1234
    me1234 New Altair Community Member
    <?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
    <operator activated="true" class="read_csv" compatibility="8.1.000" expanded="true" height="68" name="Read Health&amp;PC" width="90" x="514" y="136">
    <parameter key="csv_file" value="F:\CloudStation\Studie Mikael\VAF\DATA\amahpc-right-10k.csv"/>
    <parameter key="column_separators" value=";"/>
    <parameter key="trim_lines" value="false"/>
    <parameter key="use_quotes" value="true"/>
    <parameter key="quotes_character" value="&quot;"/>
    <parameter key="escape_character" value="\"/>
    <parameter key="skip_comments" value="false"/>
    <parameter key="comment_characters" value="#"/>
    <parameter key="parse_numbers" value="true"/>
    <parameter key="decimal_character" value="."/>
    <parameter key="grouped_digits" value="false"/>
    <parameter key="grouping_character" value=","/>
    <parameter key="date_format" value=""/>
    <parameter key="first_row_as_names" value="false"/>
    <list key="annotations">
    <parameter key="0" value="Name"/>
    </list>
    <parameter key="time_zone" value="SYSTEM"/>
    <parameter key="locale" value="English (United States)"/>
    <parameter key="encoding" value="windows-1252"/>
    <parameter key="read_all_values_as_polynominal" value="false"/>
    <list key="data_set_meta_data_information">
    <parameter key="0" value="att1.true.real.attribute"/>
    <parameter key="1" value="asin.true.polynominal.attribute"/>
    <parameter key="2" value="reviewerName.true.polynominal.attribute"/>
    <parameter key="3" value="helpful.true.real.attribute"/>
    <parameter key="4" value="reviewText.true.polynominal.attribute"/>
    <parameter key="5" value="overall.true.polynominal.attribute"/>
    <parameter key="6" value="summary.true.polynominal.attribute"/>
    <parameter key="7" value="unixReviewTime.true.real.attribute"/>
    <parameter key="8" value="reviewTime.true.polynominal.attribute"/>
    <parameter key="9" value="X1.true.real.attribute"/>
    <parameter key="10" value="X2.true.real.attribute"/>
    </list>
    <parameter key="read_not_matching_values_as_missings" value="true"/>
    <parameter key="datamanagement" value="double_array"/>
    <parameter key="data_management" value="auto"/>
    </operator>
    </process>
    <?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
    <operator activated="true" class="read_csv" compatibility="8.1.003" expanded="true" height="68" name="Read Beauty" width="90" x="514" y="289">
    <parameter key="csv_file" value="F:\CloudStation\Studie Mikael\VAF\DATA\amabea-right-10k.csv"/>
    <parameter key="column_separators" value=";"/>
    <parameter key="trim_lines" value="false"/>
    <parameter key="use_quotes" value="true"/>
    <parameter key="quotes_character" value="&quot;"/>
    <parameter key="escape_character" value="\"/>
    <parameter key="skip_comments" value="false"/>
    <parameter key="comment_characters" value="#"/>
    <parameter key="parse_numbers" value="true"/>
    <parameter key="decimal_character" value="."/>
    <parameter key="grouped_digits" value="false"/>
    <parameter key="grouping_character" value=","/>
    <parameter key="date_format" value=""/>
    <parameter key="first_row_as_names" value="false"/>
    <list key="annotations">
    <parameter key="0" value="Name"/>
    </list>
    <parameter key="time_zone" value="SYSTEM"/>
    <parameter key="locale" value="English (United States)"/>
    <parameter key="encoding" value="windows-1252"/>
    <parameter key="read_all_values_as_polynominal" value="false"/>
    <list key="data_set_meta_data_information">
    <parameter key="0" value="att1.true.real.attribute"/>
    <parameter key="1" value="reviewerID.true.polynominal.attribute"/>
    <parameter key="2" value="summary.true.polynominal.attribute"/>
    <parameter key="3" value="reviewerName.true.polynominal.attribute"/>
    <parameter key="4" value="unixReviewTime.true.real.attribute"/>
    <parameter key="5" value="reviewText.true.polynominal.attribute"/>
    <parameter key="6" value="overall.true.polynominal.attribute"/>
    <parameter key="7" value="helpful.true.real.attribute"/>
    <parameter key="8" value="asin.true.polynominal.attribute"/>
    <parameter key="9" value="X1.true.real.attribute"/>
    <parameter key="10" value="X2.true.real.attribute"/>
    </list>
    <parameter key="read_not_matching_values_as_missings" value="true"/>
    <parameter key="datamanagement" value="double_array"/>
    <parameter key="data_management" value="auto"/>
    </operator>
    </process>
    <?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
    <operator activated="true" class="subprocess" compatibility="8.1.003" expanded="true" height="82" name="Pre-processing (2)" width="90" x="648" y="289">
    <process expanded="true">
    <operator activated="true" class="replace" compatibility="8.1.003" expanded="true" height="82" name="Replace 1.0 &gt; negative (2)" width="90" x="45" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="overall"/>
    <parameter key="attributes" value=""/>
    <parameter key="use_except_expression" value="false"/>
    <parameter key="value_type" value="nominal"/>
    <parameter key="use_value_type_exception" value="false"/>
    <parameter key="except_value_type" value="file_path"/>
    <parameter key="block_type" value="single_value"/>
    <parameter key="use_block_type_exception" value="false"/>
    <parameter key="except_block_type" value="single_value"/>
    <parameter key="invert_selection" value="false"/>
    <parameter key="include_special_attributes" value="false"/>
    <parameter key="replace_what" value="1.0"/>
    <parameter key="replace_by" value="negative"/>
    </operator>
    <operator activated="true" class="replace" compatibility="8.1.003" expanded="true" height="82" name="Replace 2.0 &gt; negative (2)" width="90" x="179" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="overall"/>
    <parameter key="attributes" value=""/>
    <parameter key="use_except_expression" value="false"/>
    <parameter key="value_type" value="nominal"/>
    <parameter key="use_value_type_exception" value="false"/>
    <parameter key="except_value_type" value="file_path"/>
    <parameter key="block_type" value="single_value"/>
    <parameter key="use_block_type_exception" value="false"/>
    <parameter key="except_block_type" value="single_value"/>
    <parameter key="invert_selection" value="false"/>
    <parameter key="include_special_attributes" value="false"/>
    <parameter key="replace_what" value="2.0"/>
    <parameter key="replace_by" value="negative"/>
    </operator>
    <operator activated="true" class="replace" compatibility="8.1.003" expanded="true" height="82" name="Replace 3.0 &gt; neutral (2)" width="90" x="313" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="overall"/>
    <parameter key="attributes" value=""/>
    <parameter key="use_except_expression" value="false"/>
    <parameter key="value_type" value="nominal"/>
    <parameter key="use_value_type_exception" value="false"/>
    <parameter key="except_value_type" value="file_path"/>
    <parameter key="block_type" value="single_value"/>
    <parameter key="use_block_type_exception" value="false"/>
    <parameter key="except_block_type" value="single_value"/>
    <parameter key="invert_selection" value="false"/>
    <parameter key="include_special_attributes" value="false"/>
    <parameter key="replace_what" value="3.0"/>
    <parameter key="replace_by" value="neutral"/>
    </operator>
    <operator activated="true" class="replace" compatibility="8.1.003" expanded="true" height="82" name="Replace 4.0 &gt; positive (2)" width="90" x="447" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="overall"/>
    <parameter key="attributes" value=""/>
    <parameter key="use_except_expression" value="false"/>
    <parameter key="value_type" value="nominal"/>
    <parameter key="use_value_type_exception" value="false"/>
    <parameter key="except_value_type" value="file_path"/>
    <parameter key="block_type" value="single_value"/>
    <parameter key="use_block_type_exception" value="false"/>
    <parameter key="except_block_type" value="single_value"/>
    <parameter key="invert_selection" value="false"/>
    <parameter key="include_special_attributes" value="false"/>
    <parameter key="replace_what" value="4.0"/>
    <parameter key="replace_by" value="positive"/>
    </operator>
    <operator activated="true" class="replace" compatibility="8.1.003" expanded="true" height="82" name="Replace 5.0 &gt; positive (2)" width="90" x="581" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="overall"/>
    <parameter key="attributes" value=""/>
    <parameter key="use_except_expression" value="false"/>
    <parameter key="value_type" value="nominal"/>
    <parameter key="use_value_type_exception" value="false"/>
    <parameter key="except_value_type" value="file_path"/>
    <parameter key="block_type" value="single_value"/>
    <parameter key="use_block_type_exception" value="false"/>
    <parameter key="except_block_type" value="single_value"/>
    <parameter key="invert_selection" value="false"/>
    <parameter key="include_special_attributes" value="false"/>
    <parameter key="replace_what" value="5.0"/>
    <parameter key="replace_by" value="positive"/>
    </operator>
    <operator activated="true" class="declare_missing_value" compatibility="8.1.003" expanded="true" height="82" name="Declare Missing Value (2)" width="90" x="246" y="187">
    <parameter key="attribute_filter_type" value="all"/>
    <parameter key="attribute" value=""/>
    <parameter key="attributes" value=""/>
    <parameter key="use_except_expression" value="false"/>
    <parameter key="value_type" value="attribute_value"/>
    <parameter key="use_value_type_exception" value="false"/>
    <parameter key="except_value_type" value="time"/>
    <parameter key="block_type" value="attribute_block"/>
    <parameter key="use_block_type_exception" value="false"/>
    <parameter key="except_block_type" value="value_matrix_row_start"/>
    <parameter key="invert_selection" value="false"/>
    <parameter key="include_special_attributes" value="false"/>
    <parameter key="mode" value="nominal"/>
    <parameter key="nominal_value" value="?"/>
    <parameter key="expression_value" value=""/>
    </operator>
    <operator activated="true" class="filter_examples" compatibility="8.1.003" expanded="true" height="103" name="Filter Examples (2)" width="90" x="380" y="187">
    <parameter key="parameter_expression" value=""/>
    <parameter key="condition_class" value="custom_filters"/>
    <parameter key="invert_filter" value="true"/>
    <list key="filters_list">
    <parameter key="filters_entry_key" value="overall.is_missing."/>
    <parameter key="filters_entry_key" value="reviewText.is_missing."/>
    </list>
    <parameter key="filters_logic_and" value="true"/>
    <parameter key="filters_check_metadata" value="true"/>
    </operator>
    <operator activated="true" class="select_attributes" compatibility="8.1.003" expanded="true" height="82" name="Select Attributes (2)" width="90" x="514" y="187">
    <parameter key="attribute_filter_type" value="subset"/>
    <parameter key="attribute" value=""/>
    <parameter key="attributes" value="reviewText|overall"/>
    <parameter key="use_except_expression" value="false"/>
    <parameter key="value_type" value="attribute_value"/>
    <parameter key="use_value_type_exception" value="false"/>
    <parameter key="except_value_type" value="time"/>
    <parameter key="block_type" value="attribute_block"/>
    <parameter key="use_block_type_exception" value="false"/>
    <parameter key="except_block_type" value="value_matrix_row_start"/>
    <parameter key="invert_selection" value="false"/>
    <parameter key="include_special_attributes" value="false"/>
    </operator>
    <operator activated="true" class="set_role" compatibility="8.1.003" expanded="true" height="82" name="Set Role (2)" width="90" x="648" y="187">
    <parameter key="attribute_name" value="overall"/>
    <parameter key="target_role" value="label"/>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="nominal_to_text" compatibility="8.1.003" expanded="true" height="82" name="Nominal to Text (2)" width="90" x="782" y="187">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="reviewText"/>
    <parameter key="attributes" value=""/>
    <parameter key="use_except_expression" value="false"/>
    <parameter key="value_type" value="nominal"/>
    <parameter key="use_value_type_exception" value="false"/>
    <parameter key="except_value_type" value="file_path"/>
    <parameter key="block_type" value="single_value"/>
    <parameter key="use_block_type_exception" value="false"/>
    <parameter key="except_block_type" value="single_value"/>
    <parameter key="invert_selection" value="false"/>
    <parameter key="include_special_attributes" value="false"/>
    </operator>
    <connect from_port="in 1" to_op="Replace 1.0 &gt; negative (2)" to_port="example set input"/>
    <connect from_op="Replace 1.0 &gt; negative (2)" from_port="example set output" to_op="Replace 2.0 &gt; negative (2)" to_port="example set input"/>
    <connect from_op="Replace 2.0 &gt; negative (2)" from_port="example set output" to_op="Replace 3.0 &gt; neutral (2)" to_port="example set input"/>
    <connect from_op="Replace 3.0 &gt; neutral (2)" from_port="example set output" to_op="Replace 4.0 &gt; positive (2)" to_port="example set input"/>
    <connect from_op="Replace 4.0 &gt; positive (2)" from_port="example set output" to_op="Replace 5.0 &gt; positive (2)" to_port="example set input"/>
    <connect from_op="Replace 5.0 &gt; positive (2)" from_port="example set output" to_op="Declare Missing Value (2)" to_port="example set input"/>
    <connect from_op="Declare Missing Value (2)" from_port="example set output" to_op="Filter Examples (2)" to_port="example set input"/>
    <connect from_op="Filter Examples (2)" from_port="example set output" to_op="Select Attributes (2)" to_port="example set input"/>
    <connect from_op="Select Attributes (2)" from_port="example set output" to_op="Set Role (2)" to_port="example set input"/>
    <connect from_op="Set Role (2)" from_port="example set output" to_op="Nominal to Text (2)" to_port="example set input"/>
    <connect from_op="Nominal to Text (2)" from_port="example set output" to_port="out 1"/>
    <portSpacing port="source_in 1" spacing="0"/>
    <portSpacing port="source_in 2" spacing="0"/>
    <portSpacing port="sink_out 1" spacing="0"/>
    <portSpacing port="sink_out 2" spacing="0"/>
    </process>
    </operator>
    </process>
    <?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
    <operator activated="true" class="split_data" compatibility="8.1.003" expanded="true" height="103" name="Split Data" width="90" x="648" y="442">
    <enumeration key="partitions">
    <parameter key="ratio" value="0.7"/>
    <parameter key="ratio" value="0.3"/>
    </enumeration>
    <parameter key="sampling_type" value="automatic"/>
    <parameter key="use_local_random_seed" value="false"/>
    <parameter key="local_random_seed" value="1992"/>
    </operator>
    </process>
    <?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
    <operator activated="true" class="operator_toolbox:smote" compatibility="1.0.000" expanded="true" height="82" name="Smote Upsampling (2)" width="90" x="782" y="289">
    <parameter key="number_of_neighbours" value="5"/>
    <parameter key="normalize" value="true"/>
    <parameter key="equalize_classes" value="true"/>
    <parameter key="upsampling_size" value="1000"/>
    <parameter key="auto_detect_minority_class" value="true"/>
    <parameter key="round_integers" value="true"/>
    <parameter key="nominal_change_rate" value="0.5"/>
    <parameter key="use_local_random_seed" value="false"/>
    <parameter key="local_random_seed" value="1992"/>
    </operator>
    </process>
    <?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
    <operator activated="true" class="sample" compatibility="8.1.003" expanded="true" height="82" name="Sample (2)" width="90" x="782" y="391">
    <parameter key="sample" value="relative"/>
    <parameter key="balance_data" value="true"/>
    <parameter key="sample_size" value="100"/>
    <parameter key="sample_ratio" value="0.1"/>
    <parameter key="sample_probability" value="0.1"/>
    <list key="sample_size_per_class">
    <parameter key="positive" value="1000"/>
    <parameter key="negative" value="1000"/>
    <parameter key="neutral" value="1000"/>
    </list>
    <list key="sample_ratio_per_class">
    <parameter key="negative" value="1.0"/>
    <parameter key="positive" value="0.3"/>
    <parameter key="neutral" value="1.0"/>
    </list>
    <list key="sample_probability_per_class"/>
    <parameter key="use_local_random_seed" value="false"/>
    <parameter key="local_random_seed" value="1992"/>
    </operator>
    </process>
    <?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
    <operator activated="true" class="subprocess" compatibility="8.1.003" expanded="true" height="82" name="Pre-processing" width="90" x="648" y="136">
    <process expanded="true">
    <operator activated="true" class="replace" compatibility="8.1.003" expanded="true" height="82" name="Replace 1.0 &gt; negative" width="90" x="179" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="overall"/>
    <parameter key="attributes" value=""/>
    <parameter key="use_except_expression" value="false"/>
    <parameter key="value_type" value="nominal"/>
    <parameter key="use_value_type_exception" value="false"/>
    <parameter key="except_value_type" value="file_path"/>
    <parameter key="block_type" value="single_value"/>
    <parameter key="use_block_type_exception" value="false"/>
    <parameter key="except_block_type" value="single_value"/>
    <parameter key="invert_selection" value="false"/>
    <parameter key="include_special_attributes" value="false"/>
    <parameter key="replace_what" value="1.0"/>
    <parameter key="replace_by" value="negative"/>
    </operator>
    <operator activated="true" class="replace" compatibility="8.1.003" expanded="true" height="82" name="Replace 2.0 &gt; negative" width="90" x="313" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="overall"/>
    <parameter key="attributes" value=""/>
    <parameter key="use_except_expression" value="false"/>
    <parameter key="value_type" value="nominal"/>
    <parameter key="use_value_type_exception" value="false"/>
    <parameter key="except_value_type" value="file_path"/>
    <parameter key="block_type" value="single_value"/>
    <parameter key="use_block_type_exception" value="false"/>
    <parameter key="except_block_type" value="single_value"/>
    <parameter key="invert_selection" value="false"/>
    <parameter key="include_special_attributes" value="false"/>
    <parameter key="replace_what" value="2.0"/>
    <parameter key="replace_by" value="negative"/>
    </operator>
    <operator activated="true" class="replace" compatibility="8.1.003" expanded="true" height="82" name="Replace 3.0 &gt; neutral" width="90" x="447" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="overall"/>
    <parameter key="attributes" value=""/>
    <parameter key="use_except_expression" value="false"/>
    <parameter key="value_type" value="nominal"/>
    <parameter key="use_value_type_exception" value="false"/>
    <parameter key="except_value_type" value="file_path"/>
    <parameter key="block_type" value="single_value"/>
    <parameter key="use_block_type_exception" value="false"/>
    <parameter key="except_block_type" value="single_value"/>
    <parameter key="invert_selection" value="false"/>
    <parameter key="include_special_attributes" value="false"/>
    <parameter key="replace_what" value="3.0"/>
    <parameter key="replace_by" value="neutral"/>
    </operator>
    <operator activated="true" class="replace" compatibility="8.1.003" expanded="true" height="82" name="Replace 4.0 &gt; positive" width="90" x="581" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="overall"/>
    <parameter key="attributes" value=""/>
    <parameter key="use_except_expression" value="false"/>
    <parameter key="value_type" value="nominal"/>
    <parameter key="use_value_type_exception" value="false"/>
    <parameter key="except_value_type" value="file_path"/>
    <parameter key="block_type" value="single_value"/>
    <parameter key="use_block_type_exception" value="false"/>
    <parameter key="except_block_type" value="single_value"/>
    <parameter key="invert_selection" value="false"/>
    <parameter key="include_special_attributes" value="false"/>
    <parameter key="replace_what" value="4.0"/>
    <parameter key="replace_by" value="positive"/>
    </operator>
    <operator activated="true" class="replace" compatibility="8.1.003" expanded="true" height="82" name="Replace 5.0 &gt; positive" width="90" x="715" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="overall"/>
    <parameter key="attributes" value=""/>
    <parameter key="use_except_expression" value="false"/>
    <parameter key="value_type" value="nominal"/>
    <parameter key="use_value_type_exception" value="false"/>
    <parameter key="except_value_type" value="file_path"/>
    <parameter key="block_type" value="single_value"/>
    <parameter key="use_block_type_exception" value="false"/>
    <parameter key="except_block_type" value="single_value"/>
    <parameter key="invert_selection" value="false"/>
    <parameter key="include_special_attributes" value="false"/>
    <parameter key="replace_what" value="5.0"/>
    <parameter key="replace_by" value="positive"/>
    </operator>
    <operator activated="true" class="select_attributes" compatibility="8.1.003" expanded="true" height="82" name="Select Attributes" width="90" x="380" y="136">
    <parameter key="attribute_filter_type" value="subset"/>
    <parameter key="attribute" value=""/>
    <parameter key="attributes" value="reviewText|overall"/>
    <parameter key="use_except_expression" value="false"/>
    <parameter key="value_type" value="attribute_value"/>
    <parameter key="use_value_type_exception" value="false"/>
    <parameter key="except_value_type" value="time"/>
    <parameter key="block_type" value="attribute_block"/>
    <parameter key="use_block_type_exception" value="false"/>
    <parameter key="except_block_type" value="value_matrix_row_start"/>
    <parameter key="invert_selection" value="false"/>
    <parameter key="include_special_attributes" value="false"/>
    </operator>
    <operator activated="true" class="declare_missing_value" compatibility="8.1.003" expanded="true" height="82" name="Declare Missing Value" width="90" x="514" y="136">
    <parameter key="attribute_filter_type" value="all"/>
    <parameter key="attribute" value=""/>
    <parameter key="attributes" value=""/>
    <parameter key="use_except_expression" value="false"/>
    <parameter key="value_type" value="attribute_value"/>
    <parameter key="use_value_type_exception" value="false"/>
    <parameter key="except_value_type" value="time"/>
    <parameter key="block_type" value="attribute_block"/>
    <parameter key="use_block_type_exception" value="false"/>
    <parameter key="except_block_type" value="value_matrix_row_start"/>
    <parameter key="invert_selection" value="false"/>
    <parameter key="include_special_attributes" value="false"/>
    <parameter key="mode" value="nominal"/>
    <parameter key="nominal_value" value="?"/>
    <parameter key="expression_value" value=""/>
    </operator>
    <operator activated="true" class="filter_examples" compatibility="8.1.003" expanded="true" height="103" name="Filter Examples" width="90" x="648" y="136">
    <parameter key="parameter_expression" value=""/>
    <parameter key="condition_class" value="custom_filters"/>
    <parameter key="invert_filter" value="true"/>
    <list key="filters_list">
    <parameter key="filters_entry_key" value="overall.is_missing."/>
    <parameter key="filters_entry_key" value="reviewText.is_missing."/>
    </list>
    <parameter key="filters_logic_and" value="true"/>
    <parameter key="filters_check_metadata" value="true"/>
    </operator>
    <operator activated="true" class="set_role" compatibility="8.1.003" expanded="true" height="82" name="Set Role" width="90" x="782" y="136">
    <parameter key="attribute_name" value="overall"/>
    <parameter key="target_role" value="label"/>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="nominal_to_text" compatibility="8.1.003" expanded="true" height="82" name="Nominal to Text" width="90" x="916" y="136">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="reviewText"/>
    <parameter key="attributes" value=""/>
    <parameter key="use_except_expression" value="false"/>
    <parameter key="value_type" value="nominal"/>
    <parameter key="use_value_type_exception" value="false"/>
    <parameter key="except_value_type" value="file_path"/>
    <parameter key="block_type" value="single_value"/>
    <parameter key="use_block_type_exception" value="false"/>
    <parameter key="except_block_type" value="single_value"/>
    <parameter key="invert_selection" value="false"/>
    <parameter key="include_special_attributes" value="false"/>
    </operator>
    <connect from_port="in 1" to_op="Replace 1.0 &gt; negative" to_port="example set input"/>
    <connect from_op="Replace 1.0 &gt; negative" from_port="example set output" to_op="Replace 2.0 &gt; negative" to_port="example set input"/>
    <connect from_op="Replace 2.0 &gt; negative" from_port="example set output" to_op="Replace 3.0 &gt; neutral" to_port="example set input"/>
    <connect from_op="Replace 3.0 &gt; neutral" from_port="example set output" to_op="Replace 4.0 &gt; positive" to_port="example set input"/>
    <connect from_op="Replace 4.0 &gt; positive" from_port="example set output" to_op="Replace 5.0 &gt; positive" to_port="example set input"/>
    <connect from_op="Replace 5.0 &gt; positive" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
    <connect from_op="Select Attributes" from_port="example set output" to_op="Declare Missing Value" to_port="example set input"/>
    <connect from_op="Declare Missing Value" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
    <connect from_op="Filter Examples" from_port="example set output" to_op="Set Role" to_port="example set input"/>
    <connect from_op="Set Role" from_port="example set output" to_op="Nominal to Text" to_port="example set input"/>
    <connect from_op="Nominal to Text" from_port="example set output" to_port="out 1"/>
    <portSpacing port="source_in 1" spacing="0"/>
    <portSpacing port="source_in 2" spacing="0"/>
    <portSpacing port="sink_out 1" spacing="0"/>
    <portSpacing port="sink_out 2" spacing="0"/>
    </process>
    </operator>
    </process>
    <?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
    <operator activated="true" class="operator_toolbox:smote" compatibility="1.0.000" expanded="true" height="82" name="Smote Upsampling" width="90" x="782" y="85">
    <parameter key="number_of_neighbours" value="5"/>
    <parameter key="normalize" value="true"/>
    <parameter key="equalize_classes" value="true"/>
    <parameter key="upsampling_size" value="1000"/>
    <parameter key="auto_detect_minority_class" value="true"/>
    <parameter key="round_integers" value="true"/>
    <parameter key="nominal_change_rate" value="0.5"/>
    <parameter key="use_local_random_seed" value="false"/>
    <parameter key="local_random_seed" value="1992"/>
    </operator>
    </process>
    <?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
    <operator activated="true" class="sample" compatibility="8.1.003" expanded="true" height="82" name="Sample" width="90" x="782" y="187">
    <parameter key="sample" value="relative"/>
    <parameter key="balance_data" value="true"/>
    <parameter key="sample_size" value="100"/>
    <parameter key="sample_ratio" value="0.1"/>
    <parameter key="sample_probability" value="0.1"/>
    <list key="sample_size_per_class">
    <parameter key="positive" value="1000"/>
    <parameter key="negative" value="1000"/>
    <parameter key="neutral" value="1000"/>
    </list>
    <list key="sample_ratio_per_class">
    <parameter key="negative" value="1.0"/>
    <parameter key="positive" value="0.3"/>
    <parameter key="neutral" value="1.0"/>
    </list>
    <list key="sample_probability_per_class"/>
    <parameter key="use_local_random_seed" value="false"/>
    <parameter key="local_random_seed" value="1992"/>
    </operator>
    </process>
    <?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
    <operator activated="true" class="text:process_document_from_data" compatibility="8.1.000" expanded="true" height="82" name="Process Documents from Data" width="90" x="916" y="187">
    <parameter key="create_word_vector" value="true"/>
    <parameter key="vector_creation" value="TF-IDF"/>
    <parameter key="add_meta_information" value="true"/>
    <parameter key="keep_text" value="false"/>
    <parameter key="prune_method" value="percentual"/>
    <parameter key="prune_below_percent" value="1.0"/>
    <parameter key="prune_above_percent" value="30.0"/>
    <parameter key="prune_below_rank" value="0.05"/>
    <parameter key="prune_above_rank" value="0.95"/>
    <parameter key="datamanagement" value="double_sparse_array"/>
    <parameter key="data_management" value="auto"/>
    <parameter key="select_attributes_and_weights" value="false"/>
    <list key="specify_weights"/>
    <process expanded="true">
    <operator activated="true" class="text:tokenize" compatibility="8.1.000" expanded="true" height="68" name="Tokenize" width="90" x="179" y="34">
    <parameter key="mode" value="non letters"/>
    <parameter key="characters" value=".:"/>
    <parameter key="language" value="English"/>
    <parameter key="max_token_length" value="3"/>
    </operator>
    <operator activated="true" class="text:stem_porter" compatibility="8.1.000" expanded="true" height="68" name="Stem (Porter)" width="90" x="313" y="34"/>
    <operator activated="true" class="text:filter_stopwords_english" compatibility="8.1.000" expanded="true" height="68" name="Filter Stopwords (English)" width="90" x="447" y="34"/>
    <operator activated="true" class="text:transform_cases" compatibility="8.1.000" expanded="true" height="68" name="Transform Cases" width="90" x="581" y="34">
    <parameter key="transform_to" value="lower case"/>
    </operator>
    <operator activated="true" class="text:filter_by_length" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (by Length)" width="90" x="782" y="34">
    <parameter key="min_chars" value="2"/>
    <parameter key="max_chars" value="15"/>
    </operator>
    <connect from_port="document" to_op="Tokenize" to_port="document"/>
    <connect from_op="Tokenize" from_port="document" to_op="Stem (Porter)" to_port="document"/>
    <connect from_op="Stem (Porter)" from_port="document" to_op="Filter Stopwords (English)" to_port="document"/>
    <connect from_op="Filter Stopwords (English)" from_port="document" to_op="Transform Cases" to_port="document"/>
    <connect from_op="Transform Cases" from_port="document" to_op="Filter Tokens (by Length)" to_port="document"/>
    <connect from_op="Filter Tokens (by Length)" from_port="document" to_port="document 1"/>
    <portSpacing port="source_document" spacing="0"/>
    <portSpacing port="sink_document 1" spacing="0"/>
    <portSpacing port="sink_document 2" spacing="0"/>
    </process>
    </operator>
    </process>
    <?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
    <operator activated="true" class="text:process_document_from_data" compatibility="8.1.000" expanded="true" height="82" name="Process Documents from Data (2)" width="90" x="916" y="340">
    <parameter key="create_word_vector" value="true"/>
    <parameter key="vector_creation" value="TF-IDF"/>
    <parameter key="add_meta_information" value="true"/>
    <parameter key="keep_text" value="false"/>
    <parameter key="prune_method" value="percentual"/>
    <parameter key="prune_below_percent" value="1.0"/>
    <parameter key="prune_above_percent" value="30.0"/>
    <parameter key="prune_below_rank" value="0.05"/>
    <parameter key="prune_above_rank" value="0.95"/>
    <parameter key="datamanagement" value="double_sparse_array"/>
    <parameter key="data_management" value="auto"/>
    <parameter key="select_attributes_and_weights" value="false"/>
    <list key="specify_weights"/>
    <process expanded="true">
    <operator activated="true" class="text:tokenize" compatibility="8.1.000" expanded="true" height="68" name="Tokenize (2)" width="90" x="179" y="34">
    <parameter key="mode" value="non letters"/>
    <parameter key="characters" value=".:"/>
    <parameter key="language" value="English"/>
    <parameter key="max_token_length" value="3"/>
    </operator>
    <operator activated="true" class="text:stem_porter" compatibility="8.1.000" expanded="true" height="68" name="Stem (2)" width="90" x="380" y="34"/>
    <operator activated="true" class="text:filter_stopwords_english" compatibility="8.1.000" expanded="true" height="68" name="Filter Stopwords (2)" width="90" x="581" y="34"/>
    <operator activated="true" class="text:transform_cases" compatibility="8.1.000" expanded="true" height="68" name="Transform Cases (2)" width="90" x="715" y="34">
    <parameter key="transform_to" value="lower case"/>
    </operator>
    <operator activated="true" class="text:filter_by_length" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (2)" width="90" x="849" y="34">
    <parameter key="min_chars" value="2"/>
    <parameter key="max_chars" value="15"/>
    </operator>
    <connect from_port="document" to_op="Tokenize (2)" to_port="document"/>
    <connect from_op="Tokenize (2)" from_port="document" to_op="Stem (2)" to_port="document"/>
    <connect from_op="Stem (2)" from_port="document" to_op="Filter Stopwords (2)" to_port="document"/>
    <connect from_op="Filter Stopwords (2)" from_port="document" to_op="Transform Cases (2)" to_port="document"/>
    <connect from_op="Transform Cases (2)" from_port="document" to_op="Filter Tokens (2)" to_port="document"/>
    <connect from_op="Filter Tokens (2)" from_port="document" to_port="document 1"/>
    <portSpacing port="source_document" spacing="0"/>
    <portSpacing port="sink_document 1" spacing="0"/>
    <portSpacing port="sink_document 2" spacing="0"/>
    </process>
    </operator>
    </process>
    <?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
    <operator activated="true" class="concurrency:cross_validation" compatibility="8.1.003" expanded="true" height="145" name="Cross Validation (2)" width="90" x="1184" y="289">
    <parameter key="split_on_batch_attribute" value="false"/>
    <parameter key="leave_one_out" value="false"/>
    <parameter key="number_of_folds" value="2"/>
    <parameter key="sampling_type" value="automatic"/>
    <parameter key="use_local_random_seed" value="false"/>
    <parameter key="local_random_seed" value="1992"/>
    <parameter key="enable_parallel_execution" value="true"/>
    <process expanded="true">
    <operator activated="false" class="naive_bayes" compatibility="8.1.003" expanded="true" height="82" name="Naive Bayes (2)" width="90" x="179" y="238">
    <parameter key="laplace_correction" value="true"/>
    </operator>
    <operator activated="false" class="h2o:generalized_linear_model" compatibility="7.6.001" expanded="true" height="124" name="Generalized Linear Model (2)" width="90" x="179" y="340">
    <parameter key="family" value="AUTO"/>
    <parameter key="link" value="family_default"/>
    <parameter key="solver" value="AUTO"/>
    <parameter key="reproducible" value="false"/>
    <parameter key="maximum_number_of_threads" value="4"/>
    <parameter key="use_regularization" value="true"/>
    <parameter key="lambda_search" value="false"/>
    <parameter key="number_of_lambdas" value="0"/>
    <parameter key="lambda_min_ratio" value="0.0"/>
    <parameter key="early_stopping" value="true"/>
    <parameter key="stopping_rounds" value="3"/>
    <parameter key="stopping_tolerance" value="0.001"/>
    <parameter key="standardize" value="true"/>
    <parameter key="non-negative_coefficients" value="false"/>
    <parameter key="add_intercept" value="true"/>
    <parameter key="compute_p-values" value="false"/>
    <parameter key="remove_collinear_columns" value="false"/>
    <parameter key="missing_values_handling" value="MeanImputation"/>
    <parameter key="max_iterations" value="0"/>
    <parameter key="specify_beta_constraints" value="false"/>
    <list key="beta_constraints"/>
    <parameter key="max_runtime_seconds" value="0"/>
    <list key="expert_parameters"/>
    </operator>
    <operator activated="true" class="singular_value_decomposition" compatibility="8.1.003" expanded="true" height="103" name="SVD (2)" width="90" x="45" y="34">
    <parameter key="dimensionality_reduction" value="fixed number"/>
    <parameter key="percentage_threshold" value="0.95"/>
    <parameter key="dimensions" value="100"/>
    </operator>
    <operator activated="true" class="k_nn" compatibility="8.1.003" expanded="true" height="82" name="k-NN (2)" width="90" x="179" y="34">
    <parameter key="k" value="1"/>
    <parameter key="weighted_vote" value="false"/>
    <parameter key="measure_types" value="MixedMeasures"/>
    <parameter key="mixed_measure" value="MixedEuclideanDistance"/>
    <parameter key="nominal_measure" value="NominalDistance"/>
    <parameter key="numerical_measure" value="EuclideanDistance"/>
    <parameter key="divergence" value="GeneralizedIDivergence"/>
    <parameter key="kernel_type" value="radial"/>
    <parameter key="kernel_gamma" value="1.0"/>
    <parameter key="kernel_sigma1" value="1.0"/>
    <parameter key="kernel_sigma2" value="0.0"/>
    <parameter key="kernel_sigma3" value="2.0"/>
    <parameter key="kernel_degree" value="3.0"/>
    <parameter key="kernel_shift" value="1.0"/>
    <parameter key="kernel_a" value="1.0"/>
    <parameter key="kernel_b" value="0.0"/>
    </operator>
    <connect from_port="training set" to_op="SVD (2)" to_port="example set input"/>
    <connect from_op="SVD (2)" from_port="example set output" to_op="k-NN (2)" to_port="training set"/>
    <connect from_op="SVD (2)" from_port="preprocessing model" to_port="through 1"/>
    <connect from_op="k-NN (2)" from_port="model" to_port="model"/>
    <portSpacing port="source_training set" spacing="0"/>
    <portSpacing port="sink_model" spacing="0"/>
    <portSpacing port="sink_through 1" spacing="0"/>
    <portSpacing port="sink_through 2" spacing="0"/>
    </process>
    <process expanded="true">
    <operator activated="true" class="apply_model" compatibility="8.1.003" expanded="true" height="82" name="Apply Model (6)" width="90" x="45" y="136">
    <list key="application_parameters"/>
    <parameter key="create_view" value="false"/>
    </operator>
    <operator activated="true" class="apply_model" compatibility="8.1.003" expanded="true" height="82" name="Apply Model (4)" width="90" x="112" y="34">
    <list key="application_parameters"/>
    <parameter key="create_view" value="false"/>
    </operator>
    <operator activated="true" class="performance_classification" compatibility="8.1.003" expanded="true" height="82" name="Performance training (2)" width="90" x="246" y="34">
    <parameter key="main_criterion" value="accuracy"/>
    <parameter key="accuracy" value="true"/>
    <parameter key="classification_error" value="true"/>
    <parameter key="kappa" value="false"/>
    <parameter key="weighted_mean_recall" value="true"/>
    <parameter key="weighted_mean_precision" value="true"/>
    <parameter key="spearman_rho" value="false"/>
    <parameter key="kendall_tau" value="false"/>
    <parameter key="absolute_error" value="false"/>
    <parameter key="relative_error" value="false"/>
    <parameter key="relative_error_lenient" value="false"/>
    <parameter key="relative_error_strict" value="false"/>
    <parameter key="normalized_absolute_error" value="false"/>
    <parameter key="root_mean_squared_error" value="false"/>
    <parameter key="root_relative_squared_error" value="false"/>
    <parameter key="squared_error" value="false"/>
    <parameter key="correlation" value="false"/>
    <parameter key="squared_correlation" value="false"/>
    <parameter key="cross-entropy" value="false"/>
    <parameter key="margin" value="false"/>
    <parameter key="soft_margin_loss" value="false"/>
    <parameter key="logistic_loss" value="true"/>
    <parameter key="skip_undefined_labels" value="true"/>
    <parameter key="use_example_weights" value="true"/>
    <list key="class_weights"/>
    </operator>
    <connect from_port="model" to_op="Apply Model (4)" to_port="model"/>
    <connect from_port="test set" to_op="Apply Model (6)" to_port="unlabelled data"/>
    <connect from_port="through 1" to_op="Apply Model (6)" to_port="model"/>
    <connect from_op="Apply Model (6)" from_port="labelled data" to_op="Apply Model (4)" to_port="unlabelled data"/>
    <connect from_op="Apply Model (4)" from_port="labelled data" to_op="Performance training (2)" to_port="labelled data"/>
    <connect from_op="Performance training (2)" from_port="performance" to_port="performance 1"/>
    <portSpacing port="source_model" spacing="0"/>
    <portSpacing port="source_test set" spacing="0"/>
    <portSpacing port="source_through 1" spacing="0"/>
    <portSpacing port="source_through 2" spacing="0"/>
    <portSpacing port="sink_test set results" spacing="0"/>
    <portSpacing port="sink_performance 1" spacing="0"/>
    <portSpacing port="sink_performance 2" spacing="0"/>
    </process>
    </operator>
    </process>
    <?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
    <operator activated="true" class="concurrency:cross_validation" compatibility="8.1.003" expanded="true" height="145" name="Cross Validation" width="90" x="1184" y="136">
    <parameter key="split_on_batch_attribute" value="false"/>
    <parameter key="leave_one_out" value="false"/>
    <parameter key="number_of_folds" value="2"/>
    <parameter key="sampling_type" value="automatic"/>
    <parameter key="use_local_random_seed" value="false"/>
    <parameter key="local_random_seed" value="1992"/>
    <parameter key="enable_parallel_execution" value="true"/>
    <process expanded="true">
    <operator activated="false" class="naive_bayes" compatibility="8.1.003" expanded="true" height="82" name="Naive Bayes" width="90" x="179" y="238">
    <parameter key="laplace_correction" value="true"/>
    </operator>
    <operator activated="false" class="h2o:generalized_linear_model" compatibility="7.6.001" expanded="true" height="124" name="Generalized Linear Model" width="90" x="179" y="340">
    <parameter key="family" value="AUTO"/>
    <parameter key="link" value="family_default"/>
    <parameter key="solver" value="AUTO"/>
    <parameter key="reproducible" value="false"/>
    <parameter key="maximum_number_of_threads" value="4"/>
    <parameter key="use_regularization" value="true"/>
    <parameter key="lambda_search" value="false"/>
    <parameter key="number_of_lambdas" value="0"/>
    <parameter key="lambda_min_ratio" value="0.0"/>
    <parameter key="early_stopping" value="true"/>
    <parameter key="stopping_rounds" value="3"/>
    <parameter key="stopping_tolerance" value="0.001"/>
    <parameter key="standardize" value="true"/>
    <parameter key="non-negative_coefficients" value="false"/>
    <parameter key="add_intercept" value="true"/>
    <parameter key="compute_p-values" value="false"/>
    <parameter key="remove_collinear_columns" value="false"/>
    <parameter key="missing_values_handling" value="MeanImputation"/>
    <parameter key="max_iterations" value="0"/>
    <parameter key="specify_beta_constraints" value="false"/>
    <list key="beta_constraints"/>
    <parameter key="max_runtime_seconds" value="0"/>
    <list key="expert_parameters"/>
    </operator>
    <operator activated="true" class="singular_value_decomposition" compatibility="8.1.003" expanded="true" height="103" name="SVD" width="90" x="45" y="34">
    <parameter key="dimensionality_reduction" value="fixed number"/>
    <parameter key="percentage_threshold" value="0.95"/>
    <parameter key="dimensions" value="100"/>
    </operator>
    <operator activated="true" class="k_nn" compatibility="8.1.003" expanded="true" height="82" name="k-NN" width="90" x="179" y="34">
    <parameter key="k" value="1"/>
    <parameter key="weighted_vote" value="false"/>
    <parameter key="measure_types" value="MixedMeasures"/>
    <parameter key="mixed_measure" value="MixedEuclideanDistance"/>
    <parameter key="nominal_measure" value="NominalDistance"/>
    <parameter key="numerical_measure" value="EuclideanDistance"/>
    <parameter key="divergence" value="GeneralizedIDivergence"/>
    <parameter key="kernel_type" value="radial"/>
    <parameter key="kernel_gamma" value="1.0"/>
    <parameter key="kernel_sigma1" value="1.0"/>
    <parameter key="kernel_sigma2" value="0.0"/>
    <parameter key="kernel_sigma3" value="2.0"/>
    <parameter key="kernel_degree" value="3.0"/>
    <parameter key="kernel_shift" value="1.0"/>
    <parameter key="kernel_a" value="1.0"/>
    <parameter key="kernel_b" value="0.0"/>
    </operator>
    <connect from_port="training set" to_op="SVD" to_port="example set input"/>
    <connect from_op="SVD" from_port="example set output" to_op="k-NN" to_port="training set"/>
    <connect from_op="SVD" from_port="preprocessing model" to_port="through 1"/>
    <connect from_op="k-NN" from_port="model" to_port="model"/>
    <portSpacing port="source_training set" spacing="0"/>
    <portSpacing port="sink_model" spacing="0"/>
    <portSpacing port="sink_through 1" spacing="0"/>
    <portSpacing port="sink_through 2" spacing="0"/>
    </process>
    <process expanded="true">
    <operator activated="true" class="apply_model" compatibility="8.1.003" expanded="true" height="82" name="Apply Model (5)" width="90" x="45" y="85">
    <list key="application_parameters"/>
    <parameter key="create_view" value="false"/>
    </operator>
    <operator activated="true" class="apply_model" compatibility="8.1.003" expanded="true" height="82" name="Apply Model" width="90" x="179" y="34">
    <list key="application_parameters"/>
    <parameter key="create_view" value="false"/>
    </operator>
    <operator activated="true" class="performance_classification" compatibility="8.1.003" expanded="true" height="82" name="Performance training" width="90" x="313" y="34">
    <parameter key="main_criterion" value="accuracy"/>
    <parameter key="accuracy" value="true"/>
    <parameter key="classification_error" value="true"/>
    <parameter key="kappa" value="false"/>
    <parameter key="weighted_mean_recall" value="true"/>
    <parameter key="weighted_mean_precision" value="true"/>
    <parameter key="spearman_rho" value="false"/>
    <parameter key="kendall_tau" value="false"/>
    <parameter key="absolute_error" value="false"/>
    <parameter key="relative_error" value="false"/>
    <parameter key="relative_error_lenient" value="false"/>
    <parameter key="relative_error_strict" value="false"/>
    <parameter key="normalized_absolute_error" value="false"/>
    <parameter key="root_mean_squared_error" value="false"/>
    <parameter key="root_relative_squared_error" value="false"/>
    <parameter key="squared_error" value="false"/>
    <parameter key="correlation" value="false"/>
    <parameter key="squared_correlation" value="false"/>
    <parameter key="cross-entropy" value="false"/>
    <parameter key="margin" value="false"/>
    <parameter key="soft_margin_loss" value="false"/>
    <parameter key="logistic_loss" value="true"/>
    <parameter key="skip_undefined_labels" value="true"/>
    <parameter key="use_example_weights" value="true"/>
    <list key="class_weights"/>
    </operator>
    <connect from_port="model" to_op="Apply Model" to_port="model"/>
    <connect from_port="test set" to_op="Apply Model (5)" to_port="unlabelled data"/>
    <connect from_port="through 1" to_op="Apply Model (5)" to_port="model"/>
    <connect from_op="Apply Model (5)" from_port="labelled data" to_op="Apply Model" to_port="unlabelled data"/>
    <connect from_op="Apply Model" from_port="labelled data" to_op="Performance training" to_port="labelled data"/>
    <connect from_op="Performance training" from_port="performance" to_port="performance 1"/>
    <portSpacing port="source_model" spacing="0"/>
    <portSpacing port="source_test set" spacing="0"/>
    <portSpacing port="source_through 1" spacing="0"/>
    <portSpacing port="source_through 2" spacing="0"/>
    <portSpacing port="sink_test set results" spacing="0"/>
    <portSpacing port="sink_performance 1" spacing="0"/>
    <portSpacing port="sink_performance 2" spacing="0"/>
    </process>
    </operator>
    </process>
    <?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
    <operator activated="true" class="group_models" compatibility="8.1.003" expanded="true" height="103" name="Group Models" width="90" x="1385" y="238"/>
    </process>
    <?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
    <operator activated="true" class="text:process_document_from_data" compatibility="8.1.000" expanded="true" height="82" name="Process Documents from Data (3)" width="90" x="916" y="493">
    <parameter key="create_word_vector" value="true"/>
    <parameter key="vector_creation" value="TF-IDF"/>
    <parameter key="add_meta_information" value="true"/>
    <parameter key="keep_text" value="false"/>
    <parameter key="prune_method" value="percentual"/>
    <parameter key="prune_below_percent" value="1.0"/>
    <parameter key="prune_above_percent" value="30.0"/>
    <parameter key="prune_below_rank" value="0.05"/>
    <parameter key="prune_above_rank" value="0.95"/>
    <parameter key="datamanagement" value="double_sparse_array"/>
    <parameter key="data_management" value="auto"/>
    <parameter key="select_attributes_and_weights" value="false"/>
    <list key="specify_weights"/>
    <process expanded="true">
    <operator activated="true" class="text:tokenize" compatibility="8.1.000" expanded="true" height="68" name="Tokenize (3)" width="90" x="179" y="34">
    <parameter key="mode" value="non letters"/>
    <parameter key="characters" value=".:"/>
    <parameter key="language" value="English"/>
    <parameter key="max_token_length" value="3"/>
    </operator>
    <operator activated="true" class="text:stem_porter" compatibility="8.1.000" expanded="true" height="68" name="Stem (3)" width="90" x="380" y="34"/>
    <operator activated="true" class="text:filter_stopwords_english" compatibility="8.1.000" expanded="true" height="68" name="Filter Stopwords (3)" width="90" x="581" y="34"/>
    <operator activated="true" class="text:transform_cases" compatibility="8.1.000" expanded="true" height="68" name="Transform Cases (3)" width="90" x="715" y="34">
    <parameter key="transform_to" value="lower case"/>
    </operator>
    <operator activated="true" class="text:filter_by_length" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (3)" width="90" x="849" y="34">
    <parameter key="min_chars" value="2"/>
    <parameter key="max_chars" value="15"/>
    </operator>
    <connect from_port="document" to_op="Tokenize (3)" to_port="document"/>
    <connect from_op="Tokenize (3)" from_port="document" to_op="Stem (3)" to_port="document"/>
    <connect from_op="Stem (3)" from_port="document" to_op="Filter Stopwords (3)" to_port="document"/>
    <connect from_op="Filter Stopwords (3)" from_port="document" to_op="Transform Cases (3)" to_port="document"/>
    <connect from_op="Transform Cases (3)" from_port="document" to_op="Filter Tokens (3)" to_port="document"/>
    <connect from_op="Filter Tokens (3)" from_port="document" to_port="document 1"/>
    <portSpacing port="source_document" spacing="0"/>
    <portSpacing port="sink_document 1" spacing="0"/>
    <portSpacing port="sink_document 2" spacing="0"/>
    </process>
    </operator>
    </process>
    <?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
    <operator activated="true" class="singular_value_decomposition" compatibility="8.1.003" expanded="true" height="103" name="SVD (3)" width="90" x="1117" y="493">
    <parameter key="dimensionality_reduction" value="fixed number"/>
    <parameter key="percentage_threshold" value="0.95"/>
    <parameter key="dimensions" value="100"/>
    </operator>
    </process>
    <?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
    <operator activated="true" class="apply_model" compatibility="8.1.003" expanded="true" height="82" name="Apply Model (3)" width="90" x="1519" y="493">
    <list key="application_parameters"/>
    <parameter key="create_view" value="false"/>
    </operator>
    </process>
    <?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
    <operator activated="true" class="performance_classification" compatibility="8.1.003" expanded="true" height="82" name="Test Performance" width="90" x="1586" y="340">
    <parameter key="main_criterion" value="first"/>
    <parameter key="accuracy" value="true"/>
    <parameter key="classification_error" value="true"/>
    <parameter key="kappa" value="false"/>
    <parameter key="weighted_mean_recall" value="true"/>
    <parameter key="weighted_mean_precision" value="true"/>
    <parameter key="spearman_rho" value="false"/>
    <parameter key="kendall_tau" value="false"/>
    <parameter key="absolute_error" value="false"/>
    <parameter key="relative_error" value="false"/>
    <parameter key="relative_error_lenient" value="false"/>
    <parameter key="relative_error_strict" value="false"/>
    <parameter key="normalized_absolute_error" value="false"/>
    <parameter key="root_mean_squared_error" value="false"/>
    <parameter key="root_relative_squared_error" value="false"/>
    <parameter key="squared_error" value="false"/>
    <parameter key="correlation" value="false"/>
    <parameter key="squared_correlation" value="false"/>
    <parameter key="cross-entropy" value="false"/>
    <parameter key="margin" value="false"/>
    <parameter key="soft_margin_loss" value="false"/>
    <parameter key="logistic_loss" value="true"/>
    <parameter key="skip_undefined_labels" value="true"/>
    <parameter key="use_example_weights" value="true"/>
    <list key="class_weights"/>
    </operator>
    </process>

    Hi Lionel, 

    Thanks for your response and work.

    The model that you created is not transfer learning. I need to combine the datasets full data of the source and from the target only 70%. You inspired my by putting SVD and apply models in the Cross-Validation.

     

    I have created a new model, with SVD/Apply model inside Cross Validation but gives a mean recall of 31.95% and precision 32.68% and this is less good than the normal process.

     

    I think the problem is in the pre-process combining. I guess what I want to do is not possible with Rapidminer?

  • pschlunder
    pschlunder New Altair Community Member

    Hi @me1234,

    if you train a model on a data set and want to score another data set you need to ensure, that the data schema is the same. So number of attributes, type and name has to fit.

     

    If you want to try transfer learning outside of deep learning using neural networks you could do it like the following:

    1. Make sure all data you want to use has the same schema as described above

    2. Apply multiple models one after another (so hierachical learning (which in fact is deep learning but without the neural networks here)).

    3. Keep the first x. models the same, while only retraining the last one(s) with your new data from a different domain.

     

    Here's a simple process sketch to help illustrate the concept:

    <?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process">
        <process expanded="true">
          <operator activated="true" class="retrieve" compatibility="8.1.003" expanded="true" height="68" name="Golf" width="90" x="45" y="238">
            <parameter key="repository_entry" value="//Samples/data/Golf"/>
          </operator>
          <operator activated="true" class="subprocess" compatibility="8.1.003" expanded="true" height="82" name="Subprocess" width="90" x="112" y="391">
            <process expanded="true">
              <portSpacing port="source_in 1" spacing="0"/>
              <portSpacing port="source_in 2" spacing="0"/>
              <portSpacing port="sink_out 1" spacing="0"/>
              <portSpacing port="sink_out 2" spacing="0"/>
            </process>
            <description align="center" color="transparent" colored="false" width="126">Consistency layer. Use this subprocess to rename attributes, convert their types, e.t.c. to ensure the same data structure</description>
          </operator>
          <operator activated="true" class="split_data" compatibility="8.1.003" expanded="true" height="103" name="Split Data" width="90" x="179" y="238">
            <enumeration key="partitions">
              <parameter key="ratio" value="0.8"/>
              <parameter key="ratio" value="0.2"/>
            </enumeration>
          </operator>
          <operator activated="true" class="concurrency:parallel_decision_tree" compatibility="8.1.003" expanded="true" height="103" name="Decision Tree" width="90" x="313" y="136"/>
          <operator activated="true" class="apply_model" compatibility="8.1.003" expanded="true" height="82" name="Apply Model" width="90" x="447" y="238">
            <list key="application_parameters"/>
          </operator>
          <operator activated="true" class="set_role" compatibility="8.1.003" expanded="true" height="82" name="Set Role" width="90" x="581" y="238">
            <parameter key="attribute_name" value="prediction(Play)"/>
            <list key="set_additional_roles">
              <parameter key="confidence(yes)" value="regular"/>
              <parameter key="confidence(no)" value="regular"/>
            </list>
          </operator>
          <operator activated="true" class="concurrency:parallel_decision_tree" compatibility="8.1.003" expanded="true" height="103" name="Decision Tree (2)" width="90" x="782" y="238"/>
          <connect from_op="Golf" from_port="output" to_op="Subprocess" to_port="in 1"/>
          <connect from_op="Subprocess" from_port="out 1" to_op="Split Data" to_port="example set"/>
          <connect from_op="Split Data" from_port="partition 1" to_op="Decision Tree" to_port="training set"/>
          <connect from_op="Split Data" from_port="partition 2" to_op="Apply Model" to_port="unlabelled data"/>
          <connect from_op="Decision Tree" from_port="model" to_op="Apply Model" to_port="model"/>
          <connect from_op="Apply Model" from_port="labelled data" to_op="Set Role" to_port="example set input"/>
          <connect from_op="Set Role" from_port="example set output" to_op="Decision Tree (2)" to_port="training set"/>
          <connect from_op="Decision Tree (2)" from_port="model" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="36"/>
          <portSpacing port="sink_result 2" spacing="0"/>
          <description align="center" color="green" colored="true" height="293" resized="true" width="289" x="277" y="75">Create first &amp;quot;layer&amp;quot;, potentially the one you want to keep that way</description>
          <description align="center" color="purple" colored="true" height="294" resized="true" width="143" x="569" y="74">Set the special roles of the attributes from the prediction to regular, to reuse the prediction power of the previous model as input for the next learner</description>
          <description align="center" color="yellow" colored="false" height="296" resized="true" width="268" x="720" y="73">Train a second learner as a second &amp;quot;layer&amp;quot; for specialisation on the pre-scored data. This one will be retrained when you use it with new data.</description>
        </process>
      </operator>
    </process>

     

    Hope this helps,

    Philipp