[SOLVED] input example set has no attribute
Hi,
I am trying to classify some documents using naive bayes . First training the algorithm and testing. I get an error which says -- "Input example set has no attributes ". "Learning schemes cannot be applied without atleast one attribute" and the offending operator is naive bayes (kernel).
I also see that my example set has the text and the meta data as the attributes , but after executing the process from documents ..the example set shows 5 special attributes 0 regular attributes. No idea why
I am not able to debug. I dont know why I keep getting stuck like this.
This is my xml
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.3.013">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.3.013" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="text:process_document_from_file" compatibility="5.3.001" expanded="true" height="76" name="Process Documents from Files" width="90" x="45" y="30">
<list key="text_directories">
<parameter key="low" value="C:\Users\Uma\Desktop\nvivo\Low"/>
<parameter key="high" value="C:\Users\Uma\Desktop\nvivo\High"/>
<parameter key="verylow" value="C:\Users\Uma\Desktop\nvivo\VeryLow"/>
<parameter key="veryhigh" value="C:\Users\Uma\Desktop\nvivo\VeryHigh"/>
<parameter key="mediumhigh" value="C:\Users\Uma\Desktop\nvivo\MediumHigh"/>
<parameter key="mediumlow" value="C:\Users\Uma\Desktop\nvivo\MediumLow"/>
</list>
<parameter key="use_file_extension_as_type" value="false"/>
<parameter key="create_word_vector" value="false"/>
<parameter key="keep_text" value="true"/>
<process expanded="true">
<connect from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="set_role" compatibility="5.3.013" expanded="true" height="76" name="Set Role" width="90" x="179" y="30">
<parameter key="attribute_name" value="text"/>
<list key="set_additional_roles">
<parameter key="text" value="regular"/>
</list>
</operator>
<operator activated="true" class="text:process_document_from_data" compatibility="5.3.001" expanded="true" height="76" name="Process Documents from Data" width="90" x="313" y="30">
<parameter key="create_word_vector" value="false"/>
<parameter key="keep_text" value="true"/>
<parameter key="select_attributes_and_weights" value="true"/>
<list key="specify_weights">
<parameter key="text" value="1.0"/>
</list>
<process expanded="true">
<operator activated="true" class="text:transform_cases" compatibility="5.3.001" expanded="true" height="60" name="Transform Cases" width="90" x="45" y="30"/>
<operator activated="true" class="text:replace_tokens" compatibility="5.3.001" expanded="true" height="60" name="Replace Tokens" width="90" x="179" y="30">
<list key="replace_dictionary">
<parameter key="reference.*coverage" value=" "/>
<parameter key="<internals.*]" value=" "/>
<parameter key="<page>" value=" "/>
</list>
</operator>
<operator activated="true" class="text:tokenize" compatibility="5.3.001" expanded="true" height="60" name="Tokenize" width="90" x="313" y="30">
<parameter key="mode" value="linguistic sentences"/>
</operator>
<operator activated="true" class="text:filter_stopwords_english" compatibility="5.3.001" expanded="true" height="60" name="Filter Stopwords (English)" width="90" x="112" y="165"/>
<operator activated="true" class="text:stem_porter" compatibility="5.3.001" expanded="true" height="60" name="Stem (Porter)" width="90" x="313" y="165"/>
<connect from_port="document" to_op="Transform Cases" to_port="document"/>
<connect from_op="Transform Cases" from_port="document" to_op="Replace Tokens" to_port="document"/>
<connect from_op="Replace Tokens" from_port="document" to_op="Tokenize" to_port="document"/>
<connect from_op="Tokenize" from_port="document" to_op="Filter Stopwords (English)" to_port="document"/>
<connect from_op="Filter Stopwords (English)" from_port="document" to_op="Stem (Porter)" to_port="document"/>
<connect from_op="Stem (Porter)" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="x_validation" compatibility="5.3.013" expanded="true" height="112" name="Validation" width="90" x="380" y="165">
<process expanded="true">
<operator activated="true" class="naive_bayes_kernel" compatibility="5.3.013" expanded="true" height="76" name="Naive Bayes (Kernel)" width="90" x="45" y="30"/>
<connect from_port="training" to_op="Naive Bayes (Kernel)" to_port="training set"/>
<connect from_op="Naive Bayes (Kernel)" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="5.3.013" expanded="true" height="76" name="Apply Model" width="90" x="112" y="30">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance_classification" compatibility="5.3.013" expanded="true" height="76" name="Performance" width="90" x="45" y="165">
<list key="class_weights"/>
</operator>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<connect from_op="Process Documents from Files" from_port="example set" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Process Documents from Data" to_port="example set"/>
<connect from_op="Process Documents from Data" from_port="example set" to_op="Validation" to_port="training"/>
<connect from_op="Validation" from_port="model" to_port="result 2"/>
<connect from_op="Validation" from_port="training" to_port="result 1"/>
<connect from_op="Validation" from_port="averagable 1" to_port="result 3"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
<portSpacing port="sink_result 4" spacing="0"/>
</process>
</operator>
</process>
I am trying to classify some documents using naive bayes . First training the algorithm and testing. I get an error which says -- "Input example set has no attributes ". "Learning schemes cannot be applied without atleast one attribute" and the offending operator is naive bayes (kernel).
I also see that my example set has the text and the meta data as the attributes , but after executing the process from documents ..the example set shows 5 special attributes 0 regular attributes. No idea why
I am not able to debug. I dont know why I keep getting stuck like this.
This is my xml
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.3.013">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.3.013" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="text:process_document_from_file" compatibility="5.3.001" expanded="true" height="76" name="Process Documents from Files" width="90" x="45" y="30">
<list key="text_directories">
<parameter key="low" value="C:\Users\Uma\Desktop\nvivo\Low"/>
<parameter key="high" value="C:\Users\Uma\Desktop\nvivo\High"/>
<parameter key="verylow" value="C:\Users\Uma\Desktop\nvivo\VeryLow"/>
<parameter key="veryhigh" value="C:\Users\Uma\Desktop\nvivo\VeryHigh"/>
<parameter key="mediumhigh" value="C:\Users\Uma\Desktop\nvivo\MediumHigh"/>
<parameter key="mediumlow" value="C:\Users\Uma\Desktop\nvivo\MediumLow"/>
</list>
<parameter key="use_file_extension_as_type" value="false"/>
<parameter key="create_word_vector" value="false"/>
<parameter key="keep_text" value="true"/>
<process expanded="true">
<connect from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="set_role" compatibility="5.3.013" expanded="true" height="76" name="Set Role" width="90" x="179" y="30">
<parameter key="attribute_name" value="text"/>
<list key="set_additional_roles">
<parameter key="text" value="regular"/>
</list>
</operator>
<operator activated="true" class="text:process_document_from_data" compatibility="5.3.001" expanded="true" height="76" name="Process Documents from Data" width="90" x="313" y="30">
<parameter key="create_word_vector" value="false"/>
<parameter key="keep_text" value="true"/>
<parameter key="select_attributes_and_weights" value="true"/>
<list key="specify_weights">
<parameter key="text" value="1.0"/>
</list>
<process expanded="true">
<operator activated="true" class="text:transform_cases" compatibility="5.3.001" expanded="true" height="60" name="Transform Cases" width="90" x="45" y="30"/>
<operator activated="true" class="text:replace_tokens" compatibility="5.3.001" expanded="true" height="60" name="Replace Tokens" width="90" x="179" y="30">
<list key="replace_dictionary">
<parameter key="reference.*coverage" value=" "/>
<parameter key="<internals.*]" value=" "/>
<parameter key="<page>" value=" "/>
</list>
</operator>
<operator activated="true" class="text:tokenize" compatibility="5.3.001" expanded="true" height="60" name="Tokenize" width="90" x="313" y="30">
<parameter key="mode" value="linguistic sentences"/>
</operator>
<operator activated="true" class="text:filter_stopwords_english" compatibility="5.3.001" expanded="true" height="60" name="Filter Stopwords (English)" width="90" x="112" y="165"/>
<operator activated="true" class="text:stem_porter" compatibility="5.3.001" expanded="true" height="60" name="Stem (Porter)" width="90" x="313" y="165"/>
<connect from_port="document" to_op="Transform Cases" to_port="document"/>
<connect from_op="Transform Cases" from_port="document" to_op="Replace Tokens" to_port="document"/>
<connect from_op="Replace Tokens" from_port="document" to_op="Tokenize" to_port="document"/>
<connect from_op="Tokenize" from_port="document" to_op="Filter Stopwords (English)" to_port="document"/>
<connect from_op="Filter Stopwords (English)" from_port="document" to_op="Stem (Porter)" to_port="document"/>
<connect from_op="Stem (Porter)" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="x_validation" compatibility="5.3.013" expanded="true" height="112" name="Validation" width="90" x="380" y="165">
<process expanded="true">
<operator activated="true" class="naive_bayes_kernel" compatibility="5.3.013" expanded="true" height="76" name="Naive Bayes (Kernel)" width="90" x="45" y="30"/>
<connect from_port="training" to_op="Naive Bayes (Kernel)" to_port="training set"/>
<connect from_op="Naive Bayes (Kernel)" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="5.3.013" expanded="true" height="76" name="Apply Model" width="90" x="112" y="30">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance_classification" compatibility="5.3.013" expanded="true" height="76" name="Performance" width="90" x="45" y="165">
<list key="class_weights"/>
</operator>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<connect from_op="Process Documents from Files" from_port="example set" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Process Documents from Data" to_port="example set"/>
<connect from_op="Process Documents from Data" from_port="example set" to_op="Validation" to_port="training"/>
<connect from_op="Validation" from_port="model" to_port="result 2"/>
<connect from_op="Validation" from_port="training" to_port="result 1"/>
<connect from_op="Validation" from_port="averagable 1" to_port="result 3"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
<portSpacing port="sink_result 4" spacing="0"/>
</process>
</operator>
</process>