Altair RISE
A program to recognize and reward our most engaged community members
Nominate Yourself Now!
Home
Discussions
Community Q&A
Error while apply model for unseen data
HeikoeWin786
Hello all,
I am testing the NBC in cross-validation to understand throughly.
I am aware that the train dataset and test dataset needs to follow the same data pre-processing process.
However, when I take the exe of cross-validation and input the exe of pre-processed data (from unseen dataset), I got the error below.
Could you please advise me where I made a mistake here?
I had attached my rmp file and error message for your kind reference.
Thanks much in advance.
Heikoe
Find more posts tagged with
AI Studio
Errors
Accepted answers
MartinLiebig
Hi,
you ran into the "WordList Issue" outlined here:
https://community.rapidminer.com/discussion/50278/text-mining-and-the-word-list
Attached is a process which passes the word list through and should work.
<?xml version="1.0" encoding="UTF-8"?><process version="9.7.002">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="9.7.002" expanded="true" name="Process">
<parameter key="logverbosity" value="init"/>
<parameter key="random_seed" value="2001"/>
<parameter key="send_mail" value="never"/>
<parameter key="notification_email" value=""/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="SYSTEM"/>
<process expanded="true">
<operator activated="true" class="read_excel" compatibility="9.7.002" expanded="true" height="68" name="Read Excel" width="90" x="581" y="187">
<parameter key="excel_file" value="C:UsersMYATDocuments1.0 Master StudyLearningData Sciencesubjectivity_Dataset_Validated.xlsx"/>
<parameter key="sheet_selection" value="sheet number"/>
<parameter key="sheet_number" value="1"/>
<parameter key="imported_cell_range" value="A1:F10485776"/>
<parameter key="encoding" value="SYSTEM"/>
<parameter key="first_row_as_names" value="true"/>
<list key="annotations"/>
<parameter key="date_format" value=""/>
<parameter key="time_zone" value="SYSTEM"/>
<parameter key="locale" value="English (United States)"/>
<parameter key="read_all_values_as_polynominal" value="false"/>
<list key="data_set_meta_data_information">
<parameter key="0" value="customer_review.true.polynominal.attribute"/>
<parameter key="1" value="confident_score.true.binominal.label"/>
<parameter key="2" value="Airlines.true.polynominal.attribute"/>
<parameter key="3" value="TB_label.false.polynominal.attribute"/>
<parameter key="4" value="TB_Opinion.false.polynominal.attribute"/>
<parameter key="5" value="Vader_Score.false.polynominal.attribute"/>
</list>
<parameter key="read_not_matching_values_as_missings" value="false"/>
<parameter key="datamanagement" value="double_array"/>
<parameter key="data_management" value="auto"/>
</operator>
<operator activated="true" class="split_data" compatibility="9.7.002" expanded="true" height="103" name="Split Data" width="90" x="715" y="187">
<enumeration key="partitions">
<parameter key="ratio" value="0.75"/>
<parameter key="ratio" value="0.25"/>
</enumeration>
<parameter key="sampling_type" value="stratified sampling"/>
<parameter key="use_local_random_seed" value="false"/>
<parameter key="local_random_seed" value="1992"/>
</operator>
<operator activated="true" class="store" compatibility="9.7.002" expanded="true" height="68" name="Store (3)" width="90" x="916" y="187">
<parameter key="repository_entry" value="../Data/unseen_data"/>
</operator>
<operator activated="true" class="nominal_to_text" compatibility="9.7.002" expanded="true" height="82" name="Nominal to Text (2)" width="90" x="1117" y="85">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="customer_review"/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="nominal"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="file_path"/>
<parameter key="block_type" value="single_value"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="single_value"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
</operator>
<operator activated="true" class="nominal_to_text" compatibility="9.7.002" expanded="true" height="82" name="Nominal to Text" width="90" x="246" y="493">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="customer_review"/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="nominal"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="file_path"/>
<parameter key="block_type" value="single_value"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="single_value"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
</operator>
<operator activated="true" class="text:process_document_from_data" compatibility="9.3.001" expanded="true" height="82" name="Process Documents from Data" width="90" x="849" y="442">
<parameter key="create_word_vector" value="true"/>
<parameter key="vector_creation" value="TF-IDF"/>
<parameter key="add_meta_information" value="true"/>
<parameter key="keep_text" value="false"/>
<parameter key="prune_method" value="none"/>
<parameter key="prune_below_percent" value="3.0"/>
<parameter key="prune_above_percent" value="30.0"/>
<parameter key="prune_below_rank" value="0.05"/>
<parameter key="prune_above_rank" value="0.95"/>
<parameter key="datamanagement" value="double_sparse_array"/>
<parameter key="data_management" value="auto"/>
<parameter key="select_attributes_and_weights" value="false"/>
<list key="specify_weights"/>
<process expanded="true">
<operator activated="true" class="text:tokenize" compatibility="9.3.001" expanded="true" height="68" name="Tokenize" width="90" x="45" y="34">
<parameter key="mode" value="non letters"/>
<parameter key="characters" value=".:"/>
<parameter key="language" value="English"/>
<parameter key="max_token_length" value="3"/>
</operator>
<operator activated="true" class="text:transform_cases" compatibility="9.3.001" expanded="true" height="68" name="Transform Cases" width="90" x="179" y="34">
<parameter key="transform_to" value="lower case"/>
</operator>
<operator activated="true" class="text:filter_stopwords_english" compatibility="9.3.001" expanded="true" height="68" name="Filter Stopwords (English)" width="90" x="313" y="34"/>
<operator activated="true" class="text:stem_porter" compatibility="9.3.001" expanded="true" height="68" name="Stem (Porter)" width="90" x="447" y="34"/>
<operator activated="true" class="text:filter_by_length" compatibility="9.3.001" expanded="true" height="68" name="Filter Tokens (by Length)" width="90" x="581" y="34">
<parameter key="min_chars" value="3"/>
<parameter key="max_chars" value="25"/>
</operator>
<connect from_port="document" to_op="Tokenize" to_port="document"/>
<connect from_op="Tokenize" from_port="document" to_op="Transform Cases" to_port="document"/>
<connect from_op="Transform Cases" from_port="document" to_op="Filter Stopwords (English)" to_port="document"/>
<connect from_op="Filter Stopwords (English)" from_port="document" to_op="Stem (Porter)" to_port="document"/>
<connect from_op="Stem (Porter)" from_port="document" to_op="Filter Tokens (by Length)" to_port="document"/>
<connect from_op="Filter Tokens (by Length)" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="text:process_document_from_data" compatibility="9.3.001" expanded="true" height="82" name="Process Documents from Data (2)" width="90" x="1318" y="187">
<parameter key="create_word_vector" value="true"/>
<parameter key="vector_creation" value="TF-IDF"/>
<parameter key="add_meta_information" value="true"/>
<parameter key="keep_text" value="false"/>
<parameter key="prune_method" value="none"/>
<parameter key="prune_below_percent" value="3.0"/>
<parameter key="prune_above_percent" value="30.0"/>
<parameter key="prune_below_rank" value="0.05"/>
<parameter key="prune_above_rank" value="0.95"/>
<parameter key="datamanagement" value="double_sparse_array"/>
<parameter key="data_management" value="auto"/>
<parameter key="select_attributes_and_weights" value="false"/>
<list key="specify_weights"/>
<process expanded="true">
<operator activated="true" class="text:tokenize" compatibility="9.3.001" expanded="true" height="68" name="Tokenize (2)" width="90" x="45" y="34">
<parameter key="mode" value="non letters"/>
<parameter key="characters" value=".:"/>
<parameter key="language" value="English"/>
<parameter key="max_token_length" value="3"/>
</operator>
<operator activated="true" class="text:transform_cases" compatibility="9.3.001" expanded="true" height="68" name="Transform Cases (2)" width="90" x="179" y="34">
<parameter key="transform_to" value="lower case"/>
</operator>
<operator activated="true" class="text:filter_stopwords_english" compatibility="9.3.001" expanded="true" height="68" name="Filter Stopwords (English) (2)" width="90" x="313" y="34"/>
<operator activated="true" class="text:stem_porter" compatibility="9.3.001" expanded="true" height="68" name="Stem (Porter) (2)" width="90" x="447" y="34"/>
<operator activated="true" class="text:filter_by_length" compatibility="9.3.001" expanded="true" height="68" name="Filter Tokens (by Length) (2)" width="90" x="581" y="34">
<parameter key="min_chars" value="3"/>
<parameter key="max_chars" value="25"/>
</operator>
<connect from_port="document" to_op="Tokenize (2)" to_port="document"/>
<connect from_op="Tokenize (2)" from_port="document" to_op="Transform Cases (2)" to_port="document"/>
<connect from_op="Transform Cases (2)" from_port="document" to_op="Filter Stopwords (English) (2)" to_port="document"/>
<connect from_op="Filter Stopwords (English) (2)" from_port="document" to_op="Stem (Porter) (2)" to_port="document"/>
<connect from_op="Stem (Porter) (2)" from_port="document" to_op="Filter Tokens (by Length) (2)" to_port="document"/>
<connect from_op="Filter Tokens (by Length) (2)" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="concurrency:cross_validation" compatibility="9.7.002" expanded="true" height="145" name="Cross Validation" width="90" x="1050" y="442">
<parameter key="split_on_batch_attribute" value="false"/>
<parameter key="leave_one_out" value="false"/>
<parameter key="number_of_folds" value="10"/>
<parameter key="sampling_type" value="automatic"/>
<parameter key="use_local_random_seed" value="false"/>
<parameter key="local_random_seed" value="1992"/>
<parameter key="enable_parallel_execution" value="true"/>
<process expanded="true">
<operator activated="true" class="naive_bayes" compatibility="9.7.002" expanded="true" height="82" name="Naive Bayes" width="90" x="45" y="34">
<parameter key="laplace_correction" value="true"/>
</operator>
<connect from_port="training set" to_op="Naive Bayes" to_port="training set"/>
<connect from_op="Naive Bayes" from_port="model" to_port="model"/>
<portSpacing port="source_training set" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="9.7.002" expanded="true" height="82" name="Apply Model" width="90" x="45" y="34">
<list key="application_parameters"/>
<parameter key="create_view" value="false"/>
</operator>
<operator activated="true" class="performance_binominal_classification" compatibility="9.7.002" expanded="true" height="82" name="Performance" width="90" x="179" y="34">
<parameter key="manually_set_positive_class" value="false"/>
<parameter key="main_criterion" value="first"/>
<parameter key="accuracy" value="true"/>
<parameter key="classification_error" value="false"/>
<parameter key="kappa" value="true"/>
<parameter key="AUC (optimistic)" value="false"/>
<parameter key="AUC" value="false"/>
<parameter key="AUC (pessimistic)" value="false"/>
<parameter key="precision" value="true"/>
<parameter key="recall" value="true"/>
<parameter key="lift" value="false"/>
<parameter key="fallout" value="false"/>
<parameter key="f_measure" value="true"/>
<parameter key="false_positive" value="false"/>
<parameter key="false_negative" value="false"/>
<parameter key="true_positive" value="false"/>
<parameter key="true_negative" value="false"/>
<parameter key="sensitivity" value="false"/>
<parameter key="specificity" value="false"/>
<parameter key="youden" value="false"/>
<parameter key="positive_predictive_value" value="false"/>
<parameter key="negative_predictive_value" value="false"/>
<parameter key="psep" value="false"/>
<parameter key="skip_undefined_labels" value="true"/>
<parameter key="use_example_weights" value="true"/>
</operator>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_port="performance 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_test set results" spacing="0"/>
<portSpacing port="sink_performance 1" spacing="0"/>
<portSpacing port="sink_performance 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="store" compatibility="9.7.002" expanded="true" height="68" name="Store (5)" width="90" x="1452" y="493">
<parameter key="repository_entry" value="../Model/NBC_Performance"/>
</operator>
<operator activated="true" class="store" compatibility="9.7.002" expanded="true" height="68" name="Store (4)" width="90" x="1385" y="340">
<parameter key="repository_entry" value="../Model/NBC"/>
</operator>
<operator activated="true" class="apply_model" compatibility="9.7.002" expanded="true" height="82" name="Apply Model (2)" width="90" x="1653" y="442">
<list key="application_parameters"/>
<parameter key="create_view" value="false"/>
</operator>
<operator activated="true" class="performance_binominal_classification" compatibility="9.7.002" expanded="true" height="82" name="Performance (2)" width="90" x="1854" y="442">
<parameter key="manually_set_positive_class" value="false"/>
<parameter key="main_criterion" value="first"/>
<parameter key="accuracy" value="true"/>
<parameter key="classification_error" value="false"/>
<parameter key="kappa" value="true"/>
<parameter key="AUC (optimistic)" value="false"/>
<parameter key="AUC" value="false"/>
<parameter key="AUC (pessimistic)" value="false"/>
<parameter key="precision" value="true"/>
<parameter key="recall" value="true"/>
<parameter key="lift" value="false"/>
<parameter key="fallout" value="false"/>
<parameter key="f_measure" value="true"/>
<parameter key="false_positive" value="false"/>
<parameter key="false_negative" value="false"/>
<parameter key="true_positive" value="false"/>
<parameter key="true_negative" value="false"/>
<parameter key="sensitivity" value="false"/>
<parameter key="specificity" value="false"/>
<parameter key="youden" value="false"/>
<parameter key="positive_predictive_value" value="false"/>
<parameter key="negative_predictive_value" value="false"/>
<parameter key="psep" value="false"/>
<parameter key="skip_undefined_labels" value="true"/>
<parameter key="use_example_weights" value="true"/>
</operator>
<operator activated="true" class="store" compatibility="9.7.002" expanded="true" height="68" name="Store (6)" width="90" x="2055" y="442">
<parameter key="repository_entry" value="../Model/NBC_Unseen_Performance"/>
</operator>
<connect from_op="Read Excel" from_port="output" to_op="Split Data" to_port="example set"/>
<connect from_op="Split Data" from_port="partition 1" to_op="Nominal to Text" to_port="example set input"/>
<connect from_op="Split Data" from_port="partition 2" to_op="Store (3)" to_port="input"/>
<connect from_op="Store (3)" from_port="through" to_op="Nominal to Text (2)" to_port="example set input"/>
<connect from_op="Nominal to Text (2)" from_port="example set output" to_op="Process Documents from Data (2)" to_port="example set"/>
<connect from_op="Nominal to Text" from_port="example set output" to_op="Process Documents from Data" to_port="example set"/>
<connect from_op="Process Documents from Data" from_port="example set" to_op="Cross Validation" to_port="example set"/>
<connect from_op="Process Documents from Data" from_port="word list" to_op="Process Documents from Data (2)" to_port="word list"/>
<connect from_op="Process Documents from Data (2)" from_port="example set" to_op="Apply Model (2)" to_port="unlabelled data"/>
<connect from_op="Cross Validation" from_port="model" to_op="Store (4)" to_port="input"/>
<connect from_op="Cross Validation" from_port="performance 1" to_op="Store (5)" to_port="input"/>
<connect from_op="Store (4)" from_port="through" to_op="Apply Model (2)" to_port="model"/>
<connect from_op="Apply Model (2)" from_port="labelled data" to_op="Performance (2)" to_port="labelled data"/>
<connect from_op="Performance (2)" from_port="performance" to_op="Store (6)" to_port="input"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
</process>
</operator>
</process>
All comments
MartinLiebig
Hi,
you ran into the "WordList Issue" outlined here:
https://community.rapidminer.com/discussion/50278/text-mining-and-the-word-list
Attached is a process which passes the word list through and should work.
<?xml version="1.0" encoding="UTF-8"?><process version="9.7.002">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="9.7.002" expanded="true" name="Process">
<parameter key="logverbosity" value="init"/>
<parameter key="random_seed" value="2001"/>
<parameter key="send_mail" value="never"/>
<parameter key="notification_email" value=""/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="SYSTEM"/>
<process expanded="true">
<operator activated="true" class="read_excel" compatibility="9.7.002" expanded="true" height="68" name="Read Excel" width="90" x="581" y="187">
<parameter key="excel_file" value="C:UsersMYATDocuments1.0 Master StudyLearningData Sciencesubjectivity_Dataset_Validated.xlsx"/>
<parameter key="sheet_selection" value="sheet number"/>
<parameter key="sheet_number" value="1"/>
<parameter key="imported_cell_range" value="A1:F10485776"/>
<parameter key="encoding" value="SYSTEM"/>
<parameter key="first_row_as_names" value="true"/>
<list key="annotations"/>
<parameter key="date_format" value=""/>
<parameter key="time_zone" value="SYSTEM"/>
<parameter key="locale" value="English (United States)"/>
<parameter key="read_all_values_as_polynominal" value="false"/>
<list key="data_set_meta_data_information">
<parameter key="0" value="customer_review.true.polynominal.attribute"/>
<parameter key="1" value="confident_score.true.binominal.label"/>
<parameter key="2" value="Airlines.true.polynominal.attribute"/>
<parameter key="3" value="TB_label.false.polynominal.attribute"/>
<parameter key="4" value="TB_Opinion.false.polynominal.attribute"/>
<parameter key="5" value="Vader_Score.false.polynominal.attribute"/>
</list>
<parameter key="read_not_matching_values_as_missings" value="false"/>
<parameter key="datamanagement" value="double_array"/>
<parameter key="data_management" value="auto"/>
</operator>
<operator activated="true" class="split_data" compatibility="9.7.002" expanded="true" height="103" name="Split Data" width="90" x="715" y="187">
<enumeration key="partitions">
<parameter key="ratio" value="0.75"/>
<parameter key="ratio" value="0.25"/>
</enumeration>
<parameter key="sampling_type" value="stratified sampling"/>
<parameter key="use_local_random_seed" value="false"/>
<parameter key="local_random_seed" value="1992"/>
</operator>
<operator activated="true" class="store" compatibility="9.7.002" expanded="true" height="68" name="Store (3)" width="90" x="916" y="187">
<parameter key="repository_entry" value="../Data/unseen_data"/>
</operator>
<operator activated="true" class="nominal_to_text" compatibility="9.7.002" expanded="true" height="82" name="Nominal to Text (2)" width="90" x="1117" y="85">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="customer_review"/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="nominal"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="file_path"/>
<parameter key="block_type" value="single_value"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="single_value"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
</operator>
<operator activated="true" class="nominal_to_text" compatibility="9.7.002" expanded="true" height="82" name="Nominal to Text" width="90" x="246" y="493">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="customer_review"/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="nominal"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="file_path"/>
<parameter key="block_type" value="single_value"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="single_value"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
</operator>
<operator activated="true" class="text:process_document_from_data" compatibility="9.3.001" expanded="true" height="82" name="Process Documents from Data" width="90" x="849" y="442">
<parameter key="create_word_vector" value="true"/>
<parameter key="vector_creation" value="TF-IDF"/>
<parameter key="add_meta_information" value="true"/>
<parameter key="keep_text" value="false"/>
<parameter key="prune_method" value="none"/>
<parameter key="prune_below_percent" value="3.0"/>
<parameter key="prune_above_percent" value="30.0"/>
<parameter key="prune_below_rank" value="0.05"/>
<parameter key="prune_above_rank" value="0.95"/>
<parameter key="datamanagement" value="double_sparse_array"/>
<parameter key="data_management" value="auto"/>
<parameter key="select_attributes_and_weights" value="false"/>
<list key="specify_weights"/>
<process expanded="true">
<operator activated="true" class="text:tokenize" compatibility="9.3.001" expanded="true" height="68" name="Tokenize" width="90" x="45" y="34">
<parameter key="mode" value="non letters"/>
<parameter key="characters" value=".:"/>
<parameter key="language" value="English"/>
<parameter key="max_token_length" value="3"/>
</operator>
<operator activated="true" class="text:transform_cases" compatibility="9.3.001" expanded="true" height="68" name="Transform Cases" width="90" x="179" y="34">
<parameter key="transform_to" value="lower case"/>
</operator>
<operator activated="true" class="text:filter_stopwords_english" compatibility="9.3.001" expanded="true" height="68" name="Filter Stopwords (English)" width="90" x="313" y="34"/>
<operator activated="true" class="text:stem_porter" compatibility="9.3.001" expanded="true" height="68" name="Stem (Porter)" width="90" x="447" y="34"/>
<operator activated="true" class="text:filter_by_length" compatibility="9.3.001" expanded="true" height="68" name="Filter Tokens (by Length)" width="90" x="581" y="34">
<parameter key="min_chars" value="3"/>
<parameter key="max_chars" value="25"/>
</operator>
<connect from_port="document" to_op="Tokenize" to_port="document"/>
<connect from_op="Tokenize" from_port="document" to_op="Transform Cases" to_port="document"/>
<connect from_op="Transform Cases" from_port="document" to_op="Filter Stopwords (English)" to_port="document"/>
<connect from_op="Filter Stopwords (English)" from_port="document" to_op="Stem (Porter)" to_port="document"/>
<connect from_op="Stem (Porter)" from_port="document" to_op="Filter Tokens (by Length)" to_port="document"/>
<connect from_op="Filter Tokens (by Length)" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="text:process_document_from_data" compatibility="9.3.001" expanded="true" height="82" name="Process Documents from Data (2)" width="90" x="1318" y="187">
<parameter key="create_word_vector" value="true"/>
<parameter key="vector_creation" value="TF-IDF"/>
<parameter key="add_meta_information" value="true"/>
<parameter key="keep_text" value="false"/>
<parameter key="prune_method" value="none"/>
<parameter key="prune_below_percent" value="3.0"/>
<parameter key="prune_above_percent" value="30.0"/>
<parameter key="prune_below_rank" value="0.05"/>
<parameter key="prune_above_rank" value="0.95"/>
<parameter key="datamanagement" value="double_sparse_array"/>
<parameter key="data_management" value="auto"/>
<parameter key="select_attributes_and_weights" value="false"/>
<list key="specify_weights"/>
<process expanded="true">
<operator activated="true" class="text:tokenize" compatibility="9.3.001" expanded="true" height="68" name="Tokenize (2)" width="90" x="45" y="34">
<parameter key="mode" value="non letters"/>
<parameter key="characters" value=".:"/>
<parameter key="language" value="English"/>
<parameter key="max_token_length" value="3"/>
</operator>
<operator activated="true" class="text:transform_cases" compatibility="9.3.001" expanded="true" height="68" name="Transform Cases (2)" width="90" x="179" y="34">
<parameter key="transform_to" value="lower case"/>
</operator>
<operator activated="true" class="text:filter_stopwords_english" compatibility="9.3.001" expanded="true" height="68" name="Filter Stopwords (English) (2)" width="90" x="313" y="34"/>
<operator activated="true" class="text:stem_porter" compatibility="9.3.001" expanded="true" height="68" name="Stem (Porter) (2)" width="90" x="447" y="34"/>
<operator activated="true" class="text:filter_by_length" compatibility="9.3.001" expanded="true" height="68" name="Filter Tokens (by Length) (2)" width="90" x="581" y="34">
<parameter key="min_chars" value="3"/>
<parameter key="max_chars" value="25"/>
</operator>
<connect from_port="document" to_op="Tokenize (2)" to_port="document"/>
<connect from_op="Tokenize (2)" from_port="document" to_op="Transform Cases (2)" to_port="document"/>
<connect from_op="Transform Cases (2)" from_port="document" to_op="Filter Stopwords (English) (2)" to_port="document"/>
<connect from_op="Filter Stopwords (English) (2)" from_port="document" to_op="Stem (Porter) (2)" to_port="document"/>
<connect from_op="Stem (Porter) (2)" from_port="document" to_op="Filter Tokens (by Length) (2)" to_port="document"/>
<connect from_op="Filter Tokens (by Length) (2)" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="concurrency:cross_validation" compatibility="9.7.002" expanded="true" height="145" name="Cross Validation" width="90" x="1050" y="442">
<parameter key="split_on_batch_attribute" value="false"/>
<parameter key="leave_one_out" value="false"/>
<parameter key="number_of_folds" value="10"/>
<parameter key="sampling_type" value="automatic"/>
<parameter key="use_local_random_seed" value="false"/>
<parameter key="local_random_seed" value="1992"/>
<parameter key="enable_parallel_execution" value="true"/>
<process expanded="true">
<operator activated="true" class="naive_bayes" compatibility="9.7.002" expanded="true" height="82" name="Naive Bayes" width="90" x="45" y="34">
<parameter key="laplace_correction" value="true"/>
</operator>
<connect from_port="training set" to_op="Naive Bayes" to_port="training set"/>
<connect from_op="Naive Bayes" from_port="model" to_port="model"/>
<portSpacing port="source_training set" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="9.7.002" expanded="true" height="82" name="Apply Model" width="90" x="45" y="34">
<list key="application_parameters"/>
<parameter key="create_view" value="false"/>
</operator>
<operator activated="true" class="performance_binominal_classification" compatibility="9.7.002" expanded="true" height="82" name="Performance" width="90" x="179" y="34">
<parameter key="manually_set_positive_class" value="false"/>
<parameter key="main_criterion" value="first"/>
<parameter key="accuracy" value="true"/>
<parameter key="classification_error" value="false"/>
<parameter key="kappa" value="true"/>
<parameter key="AUC (optimistic)" value="false"/>
<parameter key="AUC" value="false"/>
<parameter key="AUC (pessimistic)" value="false"/>
<parameter key="precision" value="true"/>
<parameter key="recall" value="true"/>
<parameter key="lift" value="false"/>
<parameter key="fallout" value="false"/>
<parameter key="f_measure" value="true"/>
<parameter key="false_positive" value="false"/>
<parameter key="false_negative" value="false"/>
<parameter key="true_positive" value="false"/>
<parameter key="true_negative" value="false"/>
<parameter key="sensitivity" value="false"/>
<parameter key="specificity" value="false"/>
<parameter key="youden" value="false"/>
<parameter key="positive_predictive_value" value="false"/>
<parameter key="negative_predictive_value" value="false"/>
<parameter key="psep" value="false"/>
<parameter key="skip_undefined_labels" value="true"/>
<parameter key="use_example_weights" value="true"/>
</operator>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_port="performance 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_test set results" spacing="0"/>
<portSpacing port="sink_performance 1" spacing="0"/>
<portSpacing port="sink_performance 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="store" compatibility="9.7.002" expanded="true" height="68" name="Store (5)" width="90" x="1452" y="493">
<parameter key="repository_entry" value="../Model/NBC_Performance"/>
</operator>
<operator activated="true" class="store" compatibility="9.7.002" expanded="true" height="68" name="Store (4)" width="90" x="1385" y="340">
<parameter key="repository_entry" value="../Model/NBC"/>
</operator>
<operator activated="true" class="apply_model" compatibility="9.7.002" expanded="true" height="82" name="Apply Model (2)" width="90" x="1653" y="442">
<list key="application_parameters"/>
<parameter key="create_view" value="false"/>
</operator>
<operator activated="true" class="performance_binominal_classification" compatibility="9.7.002" expanded="true" height="82" name="Performance (2)" width="90" x="1854" y="442">
<parameter key="manually_set_positive_class" value="false"/>
<parameter key="main_criterion" value="first"/>
<parameter key="accuracy" value="true"/>
<parameter key="classification_error" value="false"/>
<parameter key="kappa" value="true"/>
<parameter key="AUC (optimistic)" value="false"/>
<parameter key="AUC" value="false"/>
<parameter key="AUC (pessimistic)" value="false"/>
<parameter key="precision" value="true"/>
<parameter key="recall" value="true"/>
<parameter key="lift" value="false"/>
<parameter key="fallout" value="false"/>
<parameter key="f_measure" value="true"/>
<parameter key="false_positive" value="false"/>
<parameter key="false_negative" value="false"/>
<parameter key="true_positive" value="false"/>
<parameter key="true_negative" value="false"/>
<parameter key="sensitivity" value="false"/>
<parameter key="specificity" value="false"/>
<parameter key="youden" value="false"/>
<parameter key="positive_predictive_value" value="false"/>
<parameter key="negative_predictive_value" value="false"/>
<parameter key="psep" value="false"/>
<parameter key="skip_undefined_labels" value="true"/>
<parameter key="use_example_weights" value="true"/>
</operator>
<operator activated="true" class="store" compatibility="9.7.002" expanded="true" height="68" name="Store (6)" width="90" x="2055" y="442">
<parameter key="repository_entry" value="../Model/NBC_Unseen_Performance"/>
</operator>
<connect from_op="Read Excel" from_port="output" to_op="Split Data" to_port="example set"/>
<connect from_op="Split Data" from_port="partition 1" to_op="Nominal to Text" to_port="example set input"/>
<connect from_op="Split Data" from_port="partition 2" to_op="Store (3)" to_port="input"/>
<connect from_op="Store (3)" from_port="through" to_op="Nominal to Text (2)" to_port="example set input"/>
<connect from_op="Nominal to Text (2)" from_port="example set output" to_op="Process Documents from Data (2)" to_port="example set"/>
<connect from_op="Nominal to Text" from_port="example set output" to_op="Process Documents from Data" to_port="example set"/>
<connect from_op="Process Documents from Data" from_port="example set" to_op="Cross Validation" to_port="example set"/>
<connect from_op="Process Documents from Data" from_port="word list" to_op="Process Documents from Data (2)" to_port="word list"/>
<connect from_op="Process Documents from Data (2)" from_port="example set" to_op="Apply Model (2)" to_port="unlabelled data"/>
<connect from_op="Cross Validation" from_port="model" to_op="Store (4)" to_port="input"/>
<connect from_op="Cross Validation" from_port="performance 1" to_op="Store (5)" to_port="input"/>
<connect from_op="Store (4)" from_port="through" to_op="Apply Model (2)" to_port="model"/>
<connect from_op="Apply Model (2)" from_port="labelled data" to_op="Performance (2)" to_port="labelled data"/>
<connect from_op="Performance (2)" from_port="performance" to_op="Store (6)" to_port="input"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
</process>
</operator>
</process>
HeikoeWin786
Hello
@mschmitz
Much appreciated for your kind clarification here.
Indeed, this is what I was missing.
Well noted!
Much thanks.
Heikoe
Quick Links
All Categories
Recent Discussions
Activity
Unanswered
日本語 (Japanese)
한국어(Korean)
Groups