Find more posts tagged with
Sort by:
1 - 21 of
211
Hi @ rjones13,
Thank you for your response.
Review1&2 for the first ' process Documents From File" operator & Test file is the test data that I've used for the second ' process Documents From File" operator
Thank you for your response.
Review1&2 for the first ' process Documents From File" operator & Test file is the test data that I've used for the second ' process Documents From File" operator
Hi @Qween,
I've had a look, and realised the slight issue with your process. When you're loading in your test data, you've assigned the class to be "text". So when it comes to scoring, it thinks there's a third class called text whereas your model has been trained to predict positive or negative. You'll just need to adjust your process to account for this and either split your test data or run some further processing. I've attached my attempt below.
Best,
Roland
I've had a look, and realised the slight issue with your process. When you're loading in your test data, you've assigned the class to be "text". So when it comes to scoring, it thinks there's a third class called text whereas your model has been trained to predict positive or negative. You'll just need to adjust your process to account for this and either split your test data or run some further processing. I've attached my attempt below.
Best,
Roland
<?xml version="1.0" encoding="UTF-8"?><process version="10.3.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="10.3.001" expanded="true" name="Process">
<parameter key="logverbosity" value="init"/>
<parameter key="random_seed" value="2001"/>
<parameter key="send_mail" value="never"/>
<parameter key="notification_email" value=""/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="UTF-8"/>
<process expanded="true">
<operator activated="true" class="text:process_document_from_file" compatibility="10.0.000" expanded="true" height="82" name="Process Documents from Files" width="90" x="45" y="34">
<list key="text_directories">
<parameter key="positive" value="C:/Users/rjones/Downloads/txt_sentoken/pos_reviews"/>
</list>
<parameter key="file_pattern" value="*"/>
<parameter key="extract_text_only" value="true"/>
<parameter key="use_file_extension_as_type" value="true"/>
<parameter key="content_type" value="txt"/>
<parameter key="encoding" value="UTF-8"/>
<parameter key="create_word_vector" value="true"/>
<parameter key="vector_creation" value="TF-IDF"/>
<parameter key="add_meta_information" value="true"/>
<parameter key="keep_text" value="false"/>
<parameter key="prune_method" value="none"/>
<parameter key="prune_below_percent" value="3.0"/>
<parameter key="prune_above_percent" value="30.0"/>
<parameter key="prune_below_absolute" value="1"/>
<parameter key="prune_below_rank" value="0.05"/>
<parameter key="prune_above_rank" value="0.95"/>
<parameter key="datamanagement" value="double_sparse_array"/>
<parameter key="data_management" value="auto"/>
<process expanded="true">
<operator activated="true" class="text:tokenize" compatibility="10.0.000" expanded="true" height="68" name="Tokenize" width="90" x="45" y="34">
<parameter key="mode" value="non letters"/>
<parameter key="characters" value=".:"/>
<parameter key="language" value="English"/>
<parameter key="max_token_length" value="3"/>
</operator>
<operator activated="true" class="text:transform_cases" compatibility="10.0.000" expanded="true" height="68" name="Transform Cases" width="90" x="179" y="34">
<parameter key="transform_to" value="lower case"/>
</operator>
<operator activated="true" class="text:filter_stopwords_english" compatibility="10.0.000" expanded="true" height="68" name="Filter Stopwords (English)" width="90" x="313" y="34"/>
<operator activated="true" class="text:stem_porter" compatibility="10.0.000" expanded="true" height="68" name="Stem (Porter)" width="90" x="447" y="34"/>
<connect from_port="document" to_op="Tokenize" to_port="document"/>
<connect from_op="Tokenize" from_port="document" to_op="Transform Cases" to_port="document"/>
<connect from_op="Transform Cases" from_port="document" to_op="Filter Stopwords (English)" to_port="document"/>
<connect from_op="Filter Stopwords (English)" from_port="document" to_op="Stem (Porter)" to_port="document"/>
<connect from_op="Stem (Porter)" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="text:process_document_from_file" compatibility="10.0.000" expanded="true" height="82" name="Process Documents from Files (2)" width="90" x="45" y="136">
<list key="text_directories">
<parameter key="negative" value="C:/Users/rjones/Downloads/txt_sentoken/neg_reviews"/>
</list>
<parameter key="file_pattern" value="*"/>
<parameter key="extract_text_only" value="true"/>
<parameter key="use_file_extension_as_type" value="true"/>
<parameter key="content_type" value="txt"/>
<parameter key="encoding" value="UTF-8"/>
<parameter key="create_word_vector" value="true"/>
<parameter key="vector_creation" value="TF-IDF"/>
<parameter key="add_meta_information" value="true"/>
<parameter key="keep_text" value="false"/>
<parameter key="prune_method" value="none"/>
<parameter key="prune_below_percent" value="3.0"/>
<parameter key="prune_above_percent" value="30.0"/>
<parameter key="prune_below_absolute" value="1"/>
<parameter key="prune_below_rank" value="0.05"/>
<parameter key="prune_above_rank" value="0.95"/>
<parameter key="datamanagement" value="double_sparse_array"/>
<parameter key="data_management" value="auto"/>
<process expanded="true">
<operator activated="true" class="text:tokenize" compatibility="10.0.000" expanded="true" height="68" name="Tokenize (2)" width="90" x="45" y="34">
<parameter key="mode" value="non letters"/>
<parameter key="characters" value=".:"/>
<parameter key="language" value="English"/>
<parameter key="max_token_length" value="3"/>
</operator>
<operator activated="true" class="text:transform_cases" compatibility="10.0.000" expanded="true" height="68" name="Transform Cases (2)" width="90" x="179" y="34">
<parameter key="transform_to" value="lower case"/>
</operator>
<operator activated="true" class="text:filter_stopwords_english" compatibility="10.0.000" expanded="true" height="68" name="Filter Stopwords (English) (2)" width="90" x="313" y="34"/>
<operator activated="true" class="text:stem_porter" compatibility="10.0.000" expanded="true" height="68" name="Stem (Porter) (2)" width="90" x="447" y="34"/>
<connect from_port="document" to_op="Tokenize (2)" to_port="document"/>
<connect from_op="Tokenize (2)" from_port="document" to_op="Transform Cases (2)" to_port="document"/>
<connect from_op="Transform Cases (2)" from_port="document" to_op="Filter Stopwords (English) (2)" to_port="document"/>
<connect from_op="Filter Stopwords (English) (2)" from_port="document" to_op="Stem (Porter) (2)" to_port="document"/>
<connect from_op="Stem (Porter) (2)" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="operator_toolbox:advanced_append" compatibility="2.17.000" expanded="true" height="103" name="Append (Superset)" width="90" x="246" y="34"/>
<operator activated="true" class="replace_missing_values" compatibility="10.3.001" expanded="true" height="103" name="Replace Missing Values" width="90" x="380" y="34">
<parameter key="return_preprocessing_model" value="false"/>
<parameter key="attribute_filter_type" value="all"/>
<parameter key="attribute" value=""/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="attribute_value"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="time"/>
<parameter key="block_type" value="attribute_block"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_matrix_row_start"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
<parameter key="default" value="zero"/>
<list key="columns"/>
</operator>
<operator activated="true" class="nominal_to_binominal" compatibility="10.3.001" expanded="true" height="103" name="Nominal to Binominal" width="90" x="514" y="34">
<parameter key="return_preprocessing_model" value="false"/>
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="label"/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="nominal"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="file_path"/>
<parameter key="block_type" value="single_value"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="single_value"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="true"/>
<parameter key="transform_binominal" value="false"/>
<parameter key="use_underscore_in_name" value="false"/>
</operator>
<operator activated="true" class="support_vector_machine" compatibility="10.3.001" expanded="true" height="124" name="SVM" width="90" x="715" y="34">
<parameter key="kernel_type" value="dot"/>
<parameter key="kernel_gamma" value="1.0"/>
<parameter key="kernel_sigma1" value="1.0"/>
<parameter key="kernel_sigma2" value="0.0"/>
<parameter key="kernel_sigma3" value="2.0"/>
<parameter key="kernel_shift" value="1.0"/>
<parameter key="kernel_degree" value="2.0"/>
<parameter key="kernel_a" value="1.0"/>
<parameter key="kernel_b" value="0.0"/>
<parameter key="kernel_cache" value="200"/>
<parameter key="C" value="0.0"/>
<parameter key="convergence_epsilon" value="0.001"/>
<parameter key="max_iterations" value="100000"/>
<parameter key="scale" value="true"/>
<parameter key="calculate_weights" value="true"/>
<parameter key="return_optimization_performance" value="true"/>
<parameter key="L_pos" value="1.0"/>
<parameter key="L_neg" value="1.0"/>
<parameter key="epsilon" value="0.0"/>
<parameter key="epsilon_plus" value="0.0"/>
<parameter key="epsilon_minus" value="0.0"/>
<parameter key="balance_cost" value="false"/>
<parameter key="quadratic_loss_pos" value="false"/>
<parameter key="quadratic_loss_neg" value="false"/>
<parameter key="estimate_performance" value="false"/>
</operator>
<operator activated="true" class="text:process_document_from_file" compatibility="10.0.000" expanded="true" height="82" name="Process Documents from Files (3)" width="90" x="447" y="289">
<list key="text_directories">
<parameter key="positive" value="C:/Users/rjones/Downloads/txt_sentoken/test_pos"/>
</list>
<parameter key="file_pattern" value="*"/>
<parameter key="extract_text_only" value="true"/>
<parameter key="use_file_extension_as_type" value="true"/>
<parameter key="content_type" value="txt"/>
<parameter key="encoding" value="UTF-8"/>
<parameter key="create_word_vector" value="true"/>
<parameter key="vector_creation" value="TF-IDF"/>
<parameter key="add_meta_information" value="true"/>
<parameter key="keep_text" value="false"/>
<parameter key="prune_method" value="none"/>
<parameter key="prune_below_percent" value="3.0"/>
<parameter key="prune_above_percent" value="30.0"/>
<parameter key="prune_below_absolute" value="1"/>
<parameter key="prune_below_rank" value="0.05"/>
<parameter key="prune_above_rank" value="0.95"/>
<parameter key="datamanagement" value="double_sparse_array"/>
<parameter key="data_management" value="auto"/>
<process expanded="true">
<operator activated="true" class="text:tokenize" compatibility="10.0.000" expanded="true" height="68" name="Tokenize (3)" width="90" x="45" y="34">
<parameter key="mode" value="non letters"/>
<parameter key="characters" value=".:"/>
<parameter key="language" value="English"/>
<parameter key="max_token_length" value="3"/>
</operator>
<operator activated="true" class="text:transform_cases" compatibility="10.0.000" expanded="true" height="68" name="Transform Cases (3)" width="90" x="179" y="34">
<parameter key="transform_to" value="lower case"/>
</operator>
<operator activated="true" class="text:filter_stopwords_english" compatibility="10.0.000" expanded="true" height="68" name="Filter Stopwords (English) (3)" width="90" x="313" y="34"/>
<operator activated="true" class="text:stem_porter" compatibility="10.0.000" expanded="true" height="68" name="Stem (Porter) (3)" width="90" x="447" y="34"/>
<connect from_port="document" to_op="Tokenize (3)" to_port="document"/>
<connect from_op="Tokenize (3)" from_port="document" to_op="Transform Cases (3)" to_port="document"/>
<connect from_op="Transform Cases (3)" from_port="document" to_op="Filter Stopwords (English) (3)" to_port="document"/>
<connect from_op="Filter Stopwords (English) (3)" from_port="document" to_op="Stem (Porter) (3)" to_port="document"/>
<connect from_op="Stem (Porter) (3)" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="text:process_document_from_file" compatibility="10.0.000" expanded="true" height="82" name="Process Documents from Files (4)" width="90" x="447" y="391">
<list key="text_directories">
<parameter key="negative" value="C:/Users/rjones/Downloads/txt_sentoken/test_neg"/>
</list>
<parameter key="file_pattern" value="*"/>
<parameter key="extract_text_only" value="true"/>
<parameter key="use_file_extension_as_type" value="true"/>
<parameter key="content_type" value="txt"/>
<parameter key="encoding" value="UTF-8"/>
<parameter key="create_word_vector" value="true"/>
<parameter key="vector_creation" value="TF-IDF"/>
<parameter key="add_meta_information" value="true"/>
<parameter key="keep_text" value="false"/>
<parameter key="prune_method" value="none"/>
<parameter key="prune_below_percent" value="3.0"/>
<parameter key="prune_above_percent" value="30.0"/>
<parameter key="prune_below_absolute" value="1"/>
<parameter key="prune_below_rank" value="0.05"/>
<parameter key="prune_above_rank" value="0.95"/>
<parameter key="datamanagement" value="double_sparse_array"/>
<parameter key="data_management" value="auto"/>
<process expanded="true">
<operator activated="true" class="text:tokenize" compatibility="10.0.000" expanded="true" height="68" name="Tokenize (4)" width="90" x="45" y="34">
<parameter key="mode" value="non letters"/>
<parameter key="characters" value=".:"/>
<parameter key="language" value="English"/>
<parameter key="max_token_length" value="3"/>
</operator>
<operator activated="true" class="text:transform_cases" compatibility="10.0.000" expanded="true" height="68" name="Transform Cases (4)" width="90" x="179" y="34">
<parameter key="transform_to" value="lower case"/>
</operator>
<operator activated="true" class="text:filter_stopwords_english" compatibility="10.0.000" expanded="true" height="68" name="Filter Stopwords (English) (4)" width="90" x="313" y="34"/>
<operator activated="true" class="text:stem_porter" compatibility="10.0.000" expanded="true" height="68" name="Stem (Porter) (4)" width="90" x="447" y="34"/>
<connect from_port="document" to_op="Tokenize (4)" to_port="document"/>
<connect from_op="Tokenize (4)" from_port="document" to_op="Transform Cases (4)" to_port="document"/>
<connect from_op="Transform Cases (4)" from_port="document" to_op="Filter Stopwords (English) (4)" to_port="document"/>
<connect from_op="Filter Stopwords (English) (4)" from_port="document" to_op="Stem (Porter) (4)" to_port="document"/>
<connect from_op="Stem (Porter) (4)" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="operator_toolbox:advanced_append" compatibility="2.17.000" expanded="true" height="103" name="Append (Superset) (2)" width="90" x="581" y="289"/>
<operator activated="true" class="replace_missing_values" compatibility="10.3.001" expanded="true" height="103" name="Replace Missing Values (2)" width="90" x="715" y="289">
<parameter key="return_preprocessing_model" value="false"/>
<parameter key="attribute_filter_type" value="all"/>
<parameter key="attribute" value=""/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="attribute_value"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="time"/>
<parameter key="block_type" value="attribute_block"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_matrix_row_start"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
<parameter key="default" value="zero"/>
<list key="columns"/>
</operator>
<operator activated="true" class="apply_model" compatibility="10.3.001" expanded="true" height="82" name="Apply Model" width="90" x="782" y="187">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance_binominal_classification" compatibility="10.3.001" expanded="true" height="82" name="Performance" width="90" x="916" y="187">
<parameter key="manually_set_positive_class" value="false"/>
<parameter key="main_criterion" value="first"/>
<parameter key="accuracy" value="true"/>
<parameter key="classification_error" value="true"/>
<parameter key="kappa" value="false"/>
<parameter key="AUC (optimistic)" value="false"/>
<parameter key="AUC" value="true"/>
<parameter key="AUC (pessimistic)" value="false"/>
<parameter key="precision" value="false"/>
<parameter key="recall" value="false"/>
<parameter key="lift" value="false"/>
<parameter key="fallout" value="false"/>
<parameter key="f_measure" value="false"/>
<parameter key="false_positive" value="false"/>
<parameter key="false_negative" value="false"/>
<parameter key="true_positive" value="false"/>
<parameter key="true_negative" value="false"/>
<parameter key="sensitivity" value="false"/>
<parameter key="specificity" value="false"/>
<parameter key="youden" value="false"/>
<parameter key="positive_predictive_value" value="false"/>
<parameter key="negative_predictive_value" value="false"/>
<parameter key="psep" value="false"/>
<parameter key="skip_undefined_labels" value="true"/>
<parameter key="use_example_weights" value="true"/>
</operator>
<connect from_op="Process Documents from Files" from_port="example set" to_op="Append (Superset)" to_port="example set 1"/>
<connect from_op="Process Documents from Files" from_port="word list" to_op="Process Documents from Files (3)" to_port="word list"/>
<connect from_op="Process Documents from Files (2)" from_port="example set" to_op="Append (Superset)" to_port="example set 2"/>
<connect from_op="Process Documents from Files (2)" from_port="word list" to_op="Process Documents from Files (4)" to_port="word list"/>
<connect from_op="Append (Superset)" from_port="merged set" to_op="Replace Missing Values" to_port="example set input"/>
<connect from_op="Replace Missing Values" from_port="example set output" to_op="Nominal to Binominal" to_port="example set input"/>
<connect from_op="Nominal to Binominal" from_port="example set output" to_op="SVM" to_port="training set"/>
<connect from_op="SVM" from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_op="Process Documents from Files (3)" from_port="example set" to_op="Append (Superset) (2)" to_port="example set 1"/>
<connect from_op="Process Documents from Files (4)" from_port="example set" to_op="Append (Superset) (2)" to_port="example set 2"/>
<connect from_op="Append (Superset) (2)" from_port="merged set" to_op="Replace Missing Values (2)" to_port="example set input"/>
<connect from_op="Replace Missing Values (2)" from_port="example set output" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
Hi @ rjones13 ,
Thank you very much for your explanation.
I've tried the code multiple times with changing some of the parameters but unfortunately it did not work. It gave me a note saying that SVM needs to be labeled. Moreover, it was also looking for the testing data which is the file with a name of 'test'
Thank you very much for your explanation.
I've tried the code multiple times with changing some of the parameters but unfortunately it did not work. It gave me a note saying that SVM needs to be labeled. Moreover, it was also looking for the testing data which is the file with a name of 'test'
Hi @Qween,
Could you try importing the attached process? I'd split up the test between positive and negative, as shown in the screenshot below

Let me know if this now works.
Best,
Roland
Could you try importing the attached process? I'd split up the test between positive and negative, as shown in the screenshot below

Let me know if this now works.
Best,
Roland
Thank you for you response. Unfortunately, I didn't work. at the beginning, it gave me a pop message about the dummy operator, please see first attachment. Then, when I have tried to take the dummy operators out of the process, another message from SVM appeared, please see the second attachment. I've tried another approaches also, but I failed to get it runs and I'm not sure why it didn't work as it works with you.
Hi @Qween,
Unfortunately I don't see your attachment, but I think the process I'd build used operators from the Operator Toolbox extension. Please could you try installing this extension and then importing again?
Best,
Roland
Unfortunately I don't see your attachment, but I think the process I'd build used operators from the Operator Toolbox extension. Please could you try installing this extension and then importing again?
Best,
Roland
Hi @ rjones13,
I'm sorry I forget to attached them. However, I did the toolbox extension and I still get the same
Regards,
I'm sorry I forget to attached them. However, I did the toolbox extension and I still get the same
Regards,
Hi @Qween,
Could you add a breakpoint after "Nominal to Binominal" and run the process again? Please share a screenshot of the result
Thanks,
Roland
Could you add a breakpoint after "Nominal to Binominal" and run the process again? Please share a screenshot of the result
Thanks,
Roland
Hi @ rjones13,
I've tried to add the breakpoint after "Nominal to Binominal" but I got the same
Regards,
I've tried to add the breakpoint after "Nominal to Binominal" but I got the same
Regards,
Hi @Qween,
Was there any data present when you reached the breakpoint - my reason for asking to try that was so we could see if the problem was missing data.
Could you confirm on the 4 "Process Documents from Files" operators that you've changed them to the appropriate file locations on your machine?
Best,
Roland
Was there any data present when you reached the breakpoint - my reason for asking to try that was so we could see if the problem was missing data.
Could you confirm on the 4 "Process Documents from Files" operators that you've changed them to the appropriate file locations on your machine?
Best,
Roland
Hi @ rjones13,
Oh yes, I realized that the software is pulling the data from your machine. However, I've fixed it and I got the attached results (Accuracy = 0.00%) which is doesn't make sense as I calculated manually and it supposed to be

Regards,
Oh yes, I realized that the software is pulling the data from your machine. However, I've fixed it and I got the attached results (Accuracy = 0.00%) which is doesn't make sense as I calculated manually and it supposed to be

Regards,
Hi @Qween,
I can see you've assigned the classes "test_neg" and "test_pos" to your testing datasets. Please could you change this to "neg_reviews" and "pos_reviews". With this workflow, they are treated as different classes hence the supposed 0% accuracy - it requires you to be consistent with the class labels when testing. Hopefully with this fix you see the same 100% accuracy I did!
Best,
Roland
I can see you've assigned the classes "test_neg" and "test_pos" to your testing datasets. Please could you change this to "neg_reviews" and "pos_reviews". With this workflow, they are treated as different classes hence the supposed 0% accuracy - it requires you to be consistent with the class labels when testing. Hopefully with this fix you see the same 100% accuracy I did!
Best,
Roland
Hi @ rjones13,
Sorry for making you busy. Just changed it . However, below is a screenshot of what got as a result. It still not matching the manual one which is
Regards,
Sorry for making you busy. Just changed it . However, below is a screenshot of what got as a result. It still not matching the manual one which is
Regards,
Hi @Qween,
This surprises me slightly that we've ended up with different results. Just checking you didn't adjust the model at all? As a manual check could you look at the scored data coming out of the Performance operator:

And then share the results so we can see which files vary:

This surprises me slightly that we've ended up with different results. Just checking you didn't adjust the model at all? As a manual check could you look at the scored data coming out of the Performance operator:

And then share the results so we can see which files vary:

Hi @ rjones13,
This is a bit weird. Yes, I got different results. Although the process and the files, I believe are the same. Below are the results I got from the model.


Regards,
This is a bit weird. Yes, I got different results. Although the process and the files, I believe are the same. Below are the results I got from the model.


Regards,
Hi @ rjones13,
How about sharing the process again with all files corresponding to each operator to confirm that I haven't missed up somewhere? Would that be possible?
Thank you for your help & support
How about sharing the process again with all files corresponding to each operator to confirm that I haven't missed up somewhere? Would that be possible?
Thank you for your help & support
Hi @Qween,
I agree it's odd, as from your screenshot it looks like 100% accuracy. I've attached a zip file here with the process and the files as I've organised them. All you should need to do is just change the file paths for the 4 "Process Documents from Files" operators to match your system.
Best,
Roland
I agree it's odd, as from your screenshot it looks like 100% accuracy. I've attached a zip file here with the process and the files as I've organised them. All you should need to do is just change the file paths for the 4 "Process Documents from Files" operators to match your system.
Best,
Roland
Hi @ rjones13,
Thank you very much. It worked. However, the result of the accuracy is 100% but when I calculated manually, it supposed to be 70%. I'm not sure if still there is something needs to be change like in the parameters or anything else?


Regards,
Thank you very much. It worked. However, the result of the accuracy is 100% but when I calculated manually, it supposed to be 70%. I'm not sure if still there is something needs to be change like in the parameters or anything else?


Regards,
Hi @Qween,
Could you possibly explain to me what you mean by "manually calculated"? Just to help from my understanding what's going wrong.
The Replace Missing Values is to account for the fact that different datasets have different columns due to different content. Rather than allow missing values, instead we replace these with zeros which is more representative of what the data is showing. The Nominal to Binominal is to declare that the label is either positive or negative - technically it's not needed but it's good practice to declare it.
Best,
Roland
Could you possibly explain to me what you mean by "manually calculated"? Just to help from my understanding what's going wrong.
The Replace Missing Values is to account for the fact that different datasets have different columns due to different content. Rather than allow missing values, instead we replace these with zeros which is more representative of what the data is showing. The Nominal to Binominal is to declare that the label is either positive or negative - technically it's not needed but it's good practice to declare it.
Best,
Roland
Sort by:
1 - 1 of
11
Hi @Qween,
Could you possibly explain to me what you mean by "manually calculated"? Just to help from my understanding what's going wrong.
The Replace Missing Values is to account for the fact that different datasets have different columns due to different content. Rather than allow missing values, instead we replace these with zeros which is more representative of what the data is showing. The Nominal to Binominal is to declare that the label is either positive or negative - technically it's not needed but it's good practice to declare it.
Best,
Roland
Could you possibly explain to me what you mean by "manually calculated"? Just to help from my understanding what's going wrong.
The Replace Missing Values is to account for the fact that different datasets have different columns due to different content. Rather than allow missing values, instead we replace these with zeros which is more representative of what the data is showing. The Nominal to Binominal is to declare that the label is either positive or negative - technically it's not needed but it's good practice to declare it.
Best,
Roland
Could you post your process and data? Just trying to understand why you've got two Process Documents subprocesses in your process.
Best,
Roland