Unexpected score dataset prediction in LIBSVM!
Seyhan
New Altair Community Member
Hi,
I have a problem with libsvm classification for a unlabelled score dataset.
The issue is I get 86% classification model accuracy using 10 fold xval with (3626 - YES and 2295 -NO class of the target attribute), but when I apply the score dataset to the same model. it classifies everything as YES.
I think, but not sure, it happens due the the confidence probabilities of target classies, they never change (confidence(Yes) = 0.643885445
, confidence(No) = 0.356114555 for all observations of score dataset).
Is there anyway in rapidminer to change the confidences of score dataset classes or do I need to add a new process? Please help.
Below is the XML of the processes.
Thanks in advanve.
Regards,
Seyhan
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.0">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.0.0" expanded="true" name="Process">
<process expanded="true" height="656" width="748">
<operator activated="true" class="read_csv" compatibility="5.0.0" expanded="true" height="60" name="Read CSV" width="90" x="45" y="30">
<parameter key="file_name" value="C:\StockMarket\TrainingData_clean_Nominal.csv"/>
<parameter key="column_separators" value=","/>
<list key="data_set_meta_data_information"/>
</operator>
<operator activated="true" class="normalize" compatibility="5.0.0" expanded="true" height="94" name="Normalize" width="90" x="45" y="210">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="TargetVariable"/>
<parameter key="invert_selection" value="true"/>
</operator>
<operator activated="true" class="read_csv" compatibility="5.0.10" expanded="true" height="60" name="Read CSV (2)" width="90" x="45" y="390">
<parameter key="file_name" value="C:\StockMarket\ResultData_clean.csv"/>
<parameter key="use_quotes" value="false"/>
<parameter key="column_separators" value=","/>
<list key="data_set_meta_data_information"/>
</operator>
<operator activated="true" class="normalize" compatibility="5.0.10" expanded="true" height="94" name="Normalize (2)" width="90" x="203" y="424"/>
<operator activated="true" class="x_validation" compatibility="5.0.0" expanded="true" height="112" name="Validation" width="90" x="313" y="120">
<parameter key="sampling_type" value="shuffled sampling"/>
<process expanded="true" height="610" width="346">
<operator activated="true" class="set_role" compatibility="5.0.0" expanded="true" height="76" name="Set Role" width="90" x="45" y="75">
<parameter key="name" value="TargetVariable"/>
<parameter key="target_role" value="label"/>
</operator>
<operator activated="true" class="support_vector_machine_libsvm" compatibility="5.0.10" expanded="true" height="76" name="SVM" width="90" x="173" y="196">
<parameter key="kernel_type" value="sigmoid"/>
<list key="class_weights"/>
<parameter key="calculate_confidences" value="true"/>
</operator>
<connect from_port="training" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="SVM" to_port="training set"/>
<connect from_op="SVM" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="108"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true" height="610" width="302">
<operator activated="true" class="apply_model" compatibility="5.0.0" expanded="true" height="76" name="Apply Model" width="90" x="112" y="30">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="set_role" compatibility="5.0.0" expanded="true" height="76" name="Set Role (2)" width="90" x="179" y="165">
<parameter key="name" value="TargetVariable"/>
<parameter key="target_role" value="label"/>
</operator>
<operator activated="true" class="performance" compatibility="5.0.0" expanded="true" height="76" name="Performance" width="90" x="112" y="345"/>
<operator activated="true" class="write_performance" compatibility="5.0.10" expanded="true" height="60" name="Write Performance" width="90" x="185" y="491">
<parameter key="performance_file" value="C:\StockMarket\SVM_results.csv"/>
</operator>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Set Role (2)" to_port="example set input"/>
<connect from_op="Set Role (2)" from_port="example set output" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_op="Write Performance" to_port="input"/>
<connect from_op="Write Performance" from_port="through" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="144"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="apply_model" compatibility="5.0.10" expanded="true" height="76" name="Apply Model (2)" width="90" x="380" y="345">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="5.0.10" expanded="true" height="76" name="Select Attributes" width="90" x="447" y="480">
<parameter key="attribute_filter_type" value="regular_expression"/>
<parameter key="regular_expression" value="pred.*"/>
</operator>
<operator activated="true" class="write_csv" compatibility="5.0.10" expanded="true" height="60" name="Write CSV" width="90" x="581" y="435">
<parameter key="csv_file" value="C:\StockMarket\SVM_labelled_data.csv"/>
<parameter key="column_separator" value=","/>
</operator>
<connect from_op="Read CSV" from_port="output" to_op="Normalize" to_port="example set input"/>
<connect from_op="Normalize" from_port="example set output" to_op="Validation" to_port="training"/>
<connect from_op="Read CSV (2)" from_port="output" to_op="Normalize (2)" to_port="example set input"/>
<connect from_op="Normalize (2)" from_port="example set output" to_op="Apply Model (2)" to_port="unlabelled data"/>
<connect from_op="Validation" from_port="model" to_op="Apply Model (2)" to_port="model"/>
<connect from_op="Validation" from_port="training" to_port="result 1"/>
<connect from_op="Validation" from_port="averagable 1" to_port="result 2"/>
<connect from_op="Apply Model (2)" from_port="labelled data" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Apply Model (2)" from_port="model" to_port="result 3"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Write CSV" to_port="input"/>
<connect from_op="Write CSV" from_port="through" to_port="result 4"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
<portSpacing port="sink_result 4" spacing="0"/>
<portSpacing port="sink_result 5" spacing="0"/>
</process>
</operator>
</process>
I have a problem with libsvm classification for a unlabelled score dataset.
The issue is I get 86% classification model accuracy using 10 fold xval with (3626 - YES and 2295 -NO class of the target attribute), but when I apply the score dataset to the same model. it classifies everything as YES.
I think, but not sure, it happens due the the confidence probabilities of target classies, they never change (confidence(Yes) = 0.643885445
, confidence(No) = 0.356114555 for all observations of score dataset).
Is there anyway in rapidminer to change the confidences of score dataset classes or do I need to add a new process? Please help.
Below is the XML of the processes.
Thanks in advanve.
Regards,
Seyhan
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.0">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.0.0" expanded="true" name="Process">
<process expanded="true" height="656" width="748">
<operator activated="true" class="read_csv" compatibility="5.0.0" expanded="true" height="60" name="Read CSV" width="90" x="45" y="30">
<parameter key="file_name" value="C:\StockMarket\TrainingData_clean_Nominal.csv"/>
<parameter key="column_separators" value=","/>
<list key="data_set_meta_data_information"/>
</operator>
<operator activated="true" class="normalize" compatibility="5.0.0" expanded="true" height="94" name="Normalize" width="90" x="45" y="210">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="TargetVariable"/>
<parameter key="invert_selection" value="true"/>
</operator>
<operator activated="true" class="read_csv" compatibility="5.0.10" expanded="true" height="60" name="Read CSV (2)" width="90" x="45" y="390">
<parameter key="file_name" value="C:\StockMarket\ResultData_clean.csv"/>
<parameter key="use_quotes" value="false"/>
<parameter key="column_separators" value=","/>
<list key="data_set_meta_data_information"/>
</operator>
<operator activated="true" class="normalize" compatibility="5.0.10" expanded="true" height="94" name="Normalize (2)" width="90" x="203" y="424"/>
<operator activated="true" class="x_validation" compatibility="5.0.0" expanded="true" height="112" name="Validation" width="90" x="313" y="120">
<parameter key="sampling_type" value="shuffled sampling"/>
<process expanded="true" height="610" width="346">
<operator activated="true" class="set_role" compatibility="5.0.0" expanded="true" height="76" name="Set Role" width="90" x="45" y="75">
<parameter key="name" value="TargetVariable"/>
<parameter key="target_role" value="label"/>
</operator>
<operator activated="true" class="support_vector_machine_libsvm" compatibility="5.0.10" expanded="true" height="76" name="SVM" width="90" x="173" y="196">
<parameter key="kernel_type" value="sigmoid"/>
<list key="class_weights"/>
<parameter key="calculate_confidences" value="true"/>
</operator>
<connect from_port="training" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="SVM" to_port="training set"/>
<connect from_op="SVM" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="108"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true" height="610" width="302">
<operator activated="true" class="apply_model" compatibility="5.0.0" expanded="true" height="76" name="Apply Model" width="90" x="112" y="30">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="set_role" compatibility="5.0.0" expanded="true" height="76" name="Set Role (2)" width="90" x="179" y="165">
<parameter key="name" value="TargetVariable"/>
<parameter key="target_role" value="label"/>
</operator>
<operator activated="true" class="performance" compatibility="5.0.0" expanded="true" height="76" name="Performance" width="90" x="112" y="345"/>
<operator activated="true" class="write_performance" compatibility="5.0.10" expanded="true" height="60" name="Write Performance" width="90" x="185" y="491">
<parameter key="performance_file" value="C:\StockMarket\SVM_results.csv"/>
</operator>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Set Role (2)" to_port="example set input"/>
<connect from_op="Set Role (2)" from_port="example set output" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_op="Write Performance" to_port="input"/>
<connect from_op="Write Performance" from_port="through" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="144"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="apply_model" compatibility="5.0.10" expanded="true" height="76" name="Apply Model (2)" width="90" x="380" y="345">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="5.0.10" expanded="true" height="76" name="Select Attributes" width="90" x="447" y="480">
<parameter key="attribute_filter_type" value="regular_expression"/>
<parameter key="regular_expression" value="pred.*"/>
</operator>
<operator activated="true" class="write_csv" compatibility="5.0.10" expanded="true" height="60" name="Write CSV" width="90" x="581" y="435">
<parameter key="csv_file" value="C:\StockMarket\SVM_labelled_data.csv"/>
<parameter key="column_separator" value=","/>
</operator>
<connect from_op="Read CSV" from_port="output" to_op="Normalize" to_port="example set input"/>
<connect from_op="Normalize" from_port="example set output" to_op="Validation" to_port="training"/>
<connect from_op="Read CSV (2)" from_port="output" to_op="Normalize (2)" to_port="example set input"/>
<connect from_op="Normalize (2)" from_port="example set output" to_op="Apply Model (2)" to_port="unlabelled data"/>
<connect from_op="Validation" from_port="model" to_op="Apply Model (2)" to_port="model"/>
<connect from_op="Validation" from_port="training" to_port="result 1"/>
<connect from_op="Validation" from_port="averagable 1" to_port="result 2"/>
<connect from_op="Apply Model (2)" from_port="labelled data" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Apply Model (2)" from_port="model" to_port="result 3"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Write CSV" to_port="input"/>
<connect from_op="Write CSV" from_port="through" to_port="result 4"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
<portSpacing port="sink_result 4" spacing="0"/>
<portSpacing port="sink_result 5" spacing="0"/>
</process>
</operator>
</process>
Tagged:
0
Answers
-
Hi,
actually I can't say, where the problem is, causing this predictions. But it is obvious that you are doing forbidden things in your process.
First of all: There are two normalize operators, one for the training set, one for the test set. This is very dangerous, because the actual mean of both data sets might differ and hence the data representation will be different when applying the model to when it was when learning it. So use the preprocessing model of the first normalize operator and apply it before applying the SVM itself. This does the trick.
Then you probably will notice that you just normalized the TargetVariable in your first normalize operator. Since that the target variable seems to be a numerical attribute. Then getting confidences doesn't make any sense as well as using the C-SVC since it's a CLASSIFIER. Use a regression setting like nu-SVR instead.
Please start with importing your data to your local repository. If you then use this repository entry you will receive warnings for such a missuse in the problem tab.
Greetings,
Sebastian
0 -
Hi Sebastian,
Thanks for your help.
The target attribute of the dataset is Nominal (Yes, No) class, not numerical. I use normalization for the dataset which goes into xval operator. Because in libsvm data needs to be discritised since, the dataset variable means vary. I did try without normalization of data, but did not give me better classification than normalized data on xval 10 fold classification.
Also, on the normalization operator, I did invert the target attribute, so I did not include target variable for normalization. Does invert_selection exclude the selected attribute from being normalized?
I removed the normalization on the score dataset and still I have the same missclassification.
I am quite good at SVM but not on RapidMiner ( :-[ )which is a great tool.
I still think having same confidence values of each score observation while having 86% classification accuracy on training and testing is bit odd.
Is anybody experience similar issue? I desperately need help.
Thanks again
Regards,
Seyhan
0 -
Hi,
please try executing the following process:<?xml version="1.0" encoding="UTF-8" standalone="no"?>
If it doesn't work, please copy the output in the log window into your post. I think your data sets differ in the attributes they specify.
<process version="5.0">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.0.0" expanded="true" name="Process">
<process expanded="true" height="656" width="748">
<operator activated="true" class="read_csv" compatibility="5.0.0" expanded="true" height="60" name="Read CSV" width="90" x="45" y="30">
<parameter key="file_name" value="C:\StockMarket\TrainingData_clean_Nominal.csv"/>
<parameter key="column_separators" value=","/>
<list key="data_set_meta_data_information"/>
</operator>
<operator activated="true" class="normalize" compatibility="5.0.0" expanded="true" height="94" name="Normalize" width="90" x="45" y="165">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="TargetVariable"/>
<parameter key="invert_selection" value="true"/>
</operator>
<operator activated="true" class="read_csv" compatibility="5.0.10" expanded="true" height="60" name="Read CSV (2)" width="90" x="45" y="390">
<parameter key="file_name" value="C:\StockMarket\ResultData_clean.csv"/>
<parameter key="use_quotes" value="false"/>
<parameter key="column_separators" value=","/>
<list key="data_set_meta_data_information"/>
</operator>
<operator activated="true" class="apply_model" compatibility="5.0.10" expanded="true" height="76" name="Apply Model (3)" width="90" x="246" y="345">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="x_validation" compatibility="5.0.0" expanded="true" height="112" name="Validation" width="90" x="246" y="165">
<parameter key="sampling_type" value="shuffled sampling"/>
<process expanded="true" height="610" width="346">
<operator activated="true" class="set_role" compatibility="5.0.0" expanded="true" height="76" name="Set Role" width="90" x="45" y="75">
<parameter key="name" value="TargetVariable"/>
<parameter key="target_role" value="label"/>
</operator>
<operator activated="true" class="support_vector_machine_libsvm" compatibility="5.0.10" expanded="true" height="76" name="SVM" width="90" x="173" y="196">
<parameter key="kernel_type" value="sigmoid"/>
<list key="class_weights"/>
<parameter key="calculate_confidences" value="true"/>
</operator>
<connect from_port="training" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="SVM" to_port="training set"/>
<connect from_op="SVM" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="108"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true" height="610" width="302">
<operator activated="true" class="apply_model" compatibility="5.0.0" expanded="true" height="76" name="Apply Model" width="90" x="112" y="30">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="set_role" compatibility="5.0.0" expanded="true" height="76" name="Set Role (2)" width="90" x="179" y="165">
<parameter key="name" value="TargetVariable"/>
<parameter key="target_role" value="label"/>
</operator>
<operator activated="true" class="performance" compatibility="5.0.0" expanded="true" height="76" name="Performance" width="90" x="112" y="345"/>
<operator activated="true" class="write_performance" compatibility="5.0.10" expanded="true" height="60" name="Write Performance" width="90" x="185" y="491">
<parameter key="performance_file" value="C:\StockMarket\SVM_results.csv"/>
</operator>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Set Role (2)" to_port="example set input"/>
<connect from_op="Set Role (2)" from_port="example set output" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_op="Write Performance" to_port="input"/>
<connect from_op="Write Performance" from_port="through" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="144"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="apply_model" compatibility="5.0.10" expanded="true" height="76" name="Apply Model (2)" width="90" x="380" y="345">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="5.0.10" expanded="true" height="76" name="Select Attributes" width="90" x="514" y="345">
<parameter key="attribute_filter_type" value="regular_expression"/>
<parameter key="regular_expression" value="pred.*"/>
</operator>
<operator activated="true" class="write_csv" compatibility="5.0.10" expanded="true" height="60" name="Write CSV" width="90" x="648" y="345">
<parameter key="csv_file" value="C:\StockMarket\SVM_labelled_data.csv"/>
<parameter key="column_separator" value=","/>
</operator>
<connect from_op="Read CSV" from_port="output" to_op="Normalize" to_port="example set input"/>
<connect from_op="Normalize" from_port="example set output" to_op="Validation" to_port="training"/>
<connect from_op="Normalize" from_port="preprocessing model" to_op="Apply Model (3)" to_port="model"/>
<connect from_op="Read CSV (2)" from_port="output" to_op="Apply Model (3)" to_port="unlabelled data"/>
<connect from_op="Apply Model (3)" from_port="labelled data" to_op="Apply Model (2)" to_port="unlabelled data"/>
<connect from_op="Validation" from_port="model" to_op="Apply Model (2)" to_port="model"/>
<connect from_op="Validation" from_port="training" to_port="result 1"/>
<connect from_op="Validation" from_port="averagable 1" to_port="result 2"/>
<connect from_op="Apply Model (2)" from_port="labelled data" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Apply Model (2)" from_port="model" to_port="result 3"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Write CSV" to_port="input"/>
<connect from_op="Write CSV" from_port="through" to_port="result 4"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
<portSpacing port="sink_result 4" spacing="0"/>
<portSpacing port="sink_result 5" spacing="0"/>
</process>
</operator>
</process>
Greetings,
Sebastian0 -
Hi Sebastian,
Thanks for your response.
I used your code and it still did not work.
Although I get 91% AUC and 86% accuracy rate of testing, the model classifies each unlabelled data as one class (Yes) using your code as well.
The confidence values for each observation of unlabelled data still same. So, there is no changes in status of the issue.
As you mention I attached log and out details as well. Below are the detalis of the log and XML.
Please let me know if you need more details.
Thanks in advance,
Regards,
Seyhan
Log:
Sep 12, 2010 9:40:25 PM CONFIG: Loading perspectives.
Sep 12, 2010 9:40:27 PM CONFIG: Ignoring update check. Last update check was on Fri Sep 10 22:51:40 EST 2010
Sep 12, 2010 9:40:27 PM INFO: Connecting to: http://www.myexperiment.org/workflows.xml?num=100
Sep 12, 2010 9:40:32 PM INFO: Decoupling process from location //NewLocalRepository/Other_SVM. Process is now associated with file //NewLocalRepository/Other_SVM.
Sep 12, 2010 9:40:53 PM INFO: Saved process definition at '//NewLocalRepository/Other_SVM'.
Sep 12, 2010 9:40:55 PM INFO: No filename given for result file, using stdout for logging results!
Sep 12, 2010 9:40:55 PM INFO: Loading initial data.
Sep 12, 2010 9:40:55 PM INFO: Process //NewLocalRepository/Other_SVM starts
Sep 12, 2010 10:23:31 PM INFO: Saving results.
Sep 12, 2010 10:23:31 PM INFO: Process //NewLocalRepository/Other_SVM finished successfully after 42:36
XML
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.0">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.0.0" expanded="true" name="Process">
<process expanded="true" height="656" width="748">
<operator activated="true" class="read_csv" compatibility="5.0.0" expanded="true" height="60" name="Read CSV" width="90" x="45" y="30">
<parameter key="file_name" value="C:\StockMarket\TrainingData_clean_Nominal.csv"/>
<parameter key="column_separators" value=","/>
<list key="data_set_meta_data_information"/>
</operator>
<operator activated="true" class="normalize" compatibility="5.0.0" expanded="true" height="94" name="Normalize" width="90" x="45" y="210">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="TargetVariable"/>
<parameter key="invert_selection" value="true"/>
</operator>
<operator activated="true" class="set_role" compatibility="5.0.10" expanded="true" height="76" name="Set Role (3)" width="90" x="179" y="165">
<parameter key="name" value="TargetVariable"/>
<parameter key="target_role" value="label"/>
</operator>
<operator activated="true" class="read_csv" compatibility="5.0.10" expanded="true" height="60" name="Read CSV (2)" width="90" x="45" y="390">
<parameter key="file_name" value="C:\StockMarket\ResultData_clean_no_missing.csv"/>
<parameter key="use_quotes" value="false"/>
<parameter key="column_separators" value=","/>
<list key="data_set_meta_data_information"/>
</operator>
<operator activated="true" class="apply_model" compatibility="5.0.10" expanded="true" height="76" name="Apply Model (3)" width="90" x="246" y="345">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="x_validation" compatibility="5.0.0" expanded="true" height="112" name="Validation" width="90" x="313" y="120">
<parameter key="sampling_type" value="shuffled sampling"/>
<process expanded="true" height="610" width="346">
<operator activated="true" class="set_role" compatibility="5.0.0" expanded="true" height="76" name="Set Role" width="90" x="45" y="75">
<parameter key="name" value="TargetVariable"/>
<parameter key="target_role" value="label"/>
</operator>
<operator activated="true" class="support_vector_machine_libsvm" compatibility="5.0.10" expanded="true" height="76" name="SVM" width="90" x="173" y="196">
<parameter key="gamma" value="2.0"/>
<list key="class_weights"/>
<parameter key="confidence_for_multiclass" value="false"/>
</operator>
<connect from_port="training" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="SVM" to_port="training set"/>
<connect from_op="SVM" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="108"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true" height="610" width="302">
<operator activated="true" class="apply_model" compatibility="5.0.0" expanded="true" height="76" name="Apply Model" width="90" x="112" y="30">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="set_role" compatibility="5.0.0" expanded="true" height="76" name="Set Role (2)" width="90" x="179" y="165">
<parameter key="name" value="TargetVariable"/>
<parameter key="target_role" value="label"/>
</operator>
<operator activated="true" class="performance" compatibility="5.0.0" expanded="true" height="76" name="Performance" width="90" x="112" y="345"/>
<operator activated="true" class="write_performance" compatibility="5.0.10" expanded="true" height="60" name="Write Performance" width="90" x="185" y="491">
<parameter key="performance_file" value="C:\StockMarket\SVM_results.csv"/>
</operator>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Set Role (2)" to_port="example set input"/>
<connect from_op="Set Role (2)" from_port="example set output" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_op="Write Performance" to_port="input"/>
<connect from_op="Write Performance" from_port="through" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="144"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="apply_model" compatibility="5.0.10" expanded="true" height="76" name="Apply Model (2)" width="90" x="380" y="345">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="write_csv" compatibility="5.0.10" expanded="true" height="60" name="Write CSV" width="90" x="447" y="30">
<parameter key="csv_file" value="C:\StockMarket\SVM_labelled_data.csv"/>
<parameter key="column_separator" value=","/>
</operator>
<connect from_op="Read CSV" from_port="output" to_op="Normalize" to_port="example set input"/>
<connect from_op="Normalize" from_port="example set output" to_op="Set Role (3)" to_port="example set input"/>
<connect from_op="Normalize" from_port="preprocessing model" to_op="Apply Model (3)" to_port="model"/>
<connect from_op="Set Role (3)" from_port="example set output" to_op="Validation" to_port="training"/>
<connect from_op="Read CSV (2)" from_port="output" to_op="Apply Model (3)" to_port="unlabelled data"/>
<connect from_op="Apply Model (3)" from_port="labelled data" to_op="Apply Model (2)" to_port="unlabelled data"/>
<connect from_op="Validation" from_port="model" to_op="Apply Model (2)" to_port="model"/>
<connect from_op="Validation" from_port="training" to_port="result 1"/>
<connect from_op="Validation" from_port="averagable 1" to_port="result 2"/>
<connect from_op="Apply Model (2)" from_port="labelled data" to_op="Write CSV" to_port="input"/>
<connect from_op="Apply Model (2)" from_port="model" to_port="result 3"/>
<connect from_op="Write CSV" from_port="through" to_port="result 4"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
<portSpacing port="sink_result 4" spacing="0"/>
<portSpacing port="sink_result 5" spacing="0"/>
</process>
</operator>
</process>
0 -
Hi,
sorry, don't have any idea what might happen there. Would need the data and time to find this problem.
Greetings,
Sebastian0