Hi,
I think this bug is related to the bug mentioned in this post
http://rapid-i.com/rapidforum/index.php/topic,776.msg2897.html#new. Unfortunately, I couldn't understand the exact problem and the suggested workaround did not work for me.
The process file, shown below and includes both training and scoring, works without problem. My documents are classified correctly. However when I run a process file that includes only the scoring (the operators after the MemoryCleanUp) the documents are mislabeled.
Can anyone suggest a workaround?
thanks,
Matthew
<operator name="Root" class="Process" expanded="yes">
<operator name="FeatureExtraction" class="FeatureExtraction">
<list key="texts">
<parameter key="ADC" value="../01 Data/Model Patents/ADC"/>
<parameter key="DAC" value="../01 Data/Model Patents/DAC"/>
<parameter key="Supply" value="../01 Data/Model Patents/Supply"/>
<parameter key="ESD" value="../01 Data/Model Patents/ESD"/>
<parameter key="IO" value="../01 Data/Model Patents/IO"/>
<parameter key="Non_Volatile" value="../01 Data/Model Patents/Flash"/>
<parameter key="PLL" value="../01 Data/Model Patents/PLL"/>
<parameter key="DLL" value="../01 Data/Model Patents/DLL"/>
<parameter key="Process" value="../01 Data/Model Patents/Process"/>
<parameter key="Package" value="../01 Data/Model Patents/Package"/>
<parameter key="Amplifer" value="../01 Data/Model Patents/Amplifier"/>
<parameter key="MEMS" value="../01 Data/Model Patents/MEMS"/>
<parameter key="Optoelectronics" value="../01 Data/Model Patents/Optoelectronics"/>
</list>
<parameter key="id_attribute_type" value="short"/>
<list key="attributes">
<parameter key="XTitle" value="//x:title[@language=&#39;en']/text()"/>
<parameter key="XAbstract" value="//x:abstract/x:paragraph/text()"/>
</list>
<list key="namespaces">
<parameter key="x" value="http://schemas.delphion.com/20031014/ippublication"/>
</list>
</operator>
<operator name="Nominal2String" class="Nominal2String">
</operator>
<operator name="StringTextInput" class="StringTextInput" expanded="no">
<parameter key="remove_original_attributes" value="true"/>
<parameter key="id_attribute_type" value="short"/>
<list key="namespaces">
</list>
<operator name="StringTokenizer" class="StringTokenizer">
</operator>
<operator name="EnglishStopwordFilter" class="EnglishStopwordFilter">
</operator>
<operator name="TokenLengthFilter" class="TokenLengthFilter">
<parameter key="min_chars" value="2"/>
<parameter key="max_chars" value="15"/>
</operator>
<operator name="PorterStemmer" class="PorterStemmer">
</operator>
</operator>
<operator name="SVMWeighting" class="SVMWeighting">
</operator>
<operator name="AttributeWeightSelection" class="AttributeWeightSelection">
<parameter key="weight_relation" value="top k"/>
<parameter key="k" value="500"/>
</operator>
<operator name="ExampleSet2AttributeWeights" class="ExampleSet2AttributeWeights">
</operator>
<operator name="AttributeWeightsWriter" class="AttributeWeightsWriter">
<parameter key="attribute_weights_file" value="%{process_name}_AttrWeight.wgt"/>
</operator>
<operator name="LibSVMLearner" class="LibSVMLearner">
<parameter key="kernel_type" value="linear"/>
<list key="class_weights">
</list>
<parameter key="calculate_confidences" value="true"/>
</operator>
<operator name="ModelWriter" class="ModelWriter">
<parameter key="model_file" value="%{process_name}_Model.mod"/>
<parameter key="output_type" value="XML"/>
</operator>
<operator name="MemoryCleanUp" class="MemoryCleanUp">
</operator>
<operator name="FeatureExtraction (2)" class="FeatureExtraction">
<list key="texts">
<parameter key="Uncategorized" value="../01 Data/Test Patents"/>
</list>
<parameter key="id_attribute_type" value="short"/>
<list key="attributes">
<parameter key="XTitle" value="//x:title[@language=&#39;en']/text()"/>
<parameter key="XAbstract" value="//x:abstract/x:paragraph/text()"/>
</list>
<list key="namespaces">
<parameter key="x" value="http://schemas.delphion.com/20031014/ippublication"/>
</list>
</operator>
<operator name="Nominal2String (2)" class="Nominal2String">
</operator>
<operator name="StringTextInput (2)" class="StringTextInput" expanded="no">
<parameter key="id_attribute_type" value="short"/>
<list key="namespaces">
</list>
<operator name="StringTokenizer (2)" class="StringTokenizer">
</operator>
<operator name="EnglishStopwordFilter (2)" class="EnglishStopwordFilter">
</operator>
<operator name="TokenLengthFilter (2)" class="TokenLengthFilter">
<parameter key="min_chars" value="2"/>
<parameter key="max_chars" value="13"/>
</operator>
<operator name="PorterStemmer (2)" class="PorterStemmer">
</operator>
</operator>
<operator name="Title" class="ChangeAttributeRole">
<parameter key="name" value="XTitle"/>
<parameter key="target_role" value="XTitle"/>
</operator>
<operator name="Abstract" class="ChangeAttributeRole">
<parameter key="name" value="XAbstract"/>
<parameter key="target_role" value="XAbstract"/>
</operator>
<operator name="AttributeWeightsLoader (2)" class="AttributeWeightsLoader">
<parameter key="attribute_weights_file" value="%{process_name}_AttrWeight.wgt"/>
</operator>
<operator name="AttributeWeightsApplier (2)" class="AttributeWeightsApplier">
</operator>
<operator name="ModelLoader" class="ModelLoader">
<parameter key="model_file" value="%{process_name}_Model.mod"/>
</operator>
<operator name="ModelApplier" class="ModelApplier">
<list key="application_parameters">
</list>
</operator>
</operator>