The Siemens Community Catalyst program was co-created with our community to acknowledge technology leaders who consistently contribute to the Siemens Community. Nominations are accepted on a rolling basis.
<operator name="Root" class="Process" expanded="yes"> <description text="Octopus"/> <operator name="Extractor" class="FeatureExtraction"> <list key="texts"> <parameter key="news" value=".\train\news"/> <parameter key="porn" value=".\train\porn"/> </list> <parameter key="default_content_encoding" value="UTF-8"/> <parameter key="default_content_language" value="english"/> <list key="attributes"> <parameter key="title" value="//*/title/text() "/> <parameter key="#redirectCount" value="//*/redirectCount/text()"/> <parameter key="description" value="//*/description/text()"/> <parameter key="keywords" value="//*/keywords/text()"/> <parameter key="parseText" value="//*/parseText/text()"/> <parameter key="metaAbstract" value="//*/metaAbstract/text()"/> </list> <list key="namespaces"> </list> </operator> <operator name="AttributeSubsetPreprocessing" class="AttributeSubsetPreprocessing" expanded="yes"> <parameter key="condition_class" value="attribute_name_filter"/> <parameter key="attribute_name_regex" value="title|description|keywords|parseText|metaAbstract"/> <operator name="Nominal2String" class="Nominal2String"> </operator> </operator> <operator name="StringTextInput" class="StringTextInput" expanded="yes"> <parameter key="remove_original_attributes" value="true"/> <parameter key="return_word_list" value="true"/> <parameter key="output_word_list" value="OctopusWordList.txt"/> <list key="namespaces"> </list> <operator name="StringTokenizer" class="StringTokenizer"> </operator> <operator name="TokenLengthFilter" class="TokenLengthFilter"> <parameter key="min_chars" value="3"/> </operator> <operator name="ToLowerCaseConverter" class="ToLowerCaseConverter"> </operator> <operator name="EnglishStopwordFilter" class="EnglishStopwordFilter"> </operator> <operator name="PorterStemmer" class="PorterStemmer"> </operator> </operator> <operator name="LibSVMLearner" class="LibSVMLearner"> <parameter key="keep_example_set" value="true"/> <parameter key="kernel_type" value="linear"/> <list key="class_weights"> </list> <parameter key="calculate_confidences" value="true"/> </operator> <operator name="ModelWriter" class="ModelWriter"> <parameter key="model_file" value="OctopusModel.mod"/> <parameter key="output_type" value="Binary"/> </operator></operator>
<operator name="Root" class="Process" expanded="yes"> <operator name="Extractor" class="FeatureExtraction"> <list key="texts"> <parameter key="news" value=".\train\news"/> <parameter key="porn" value=".\train\porn"/> </list> <parameter key="default_content_encoding" value="UTF-8"/> <parameter key="default_content_language" value="english"/> <list key="attributes"> <parameter key="feature_title" value="//*/title/text() "/> <parameter key="#feature_redirectCount" value="//*/redirectCount/text()"/> <parameter key="feature_description" value="//*/description/text()"/> <parameter key="feature_keywords" value="//*/keywords/text()"/> <parameter key="feature_parseText" value="//*/parseText/text()"/> <parameter key="feature_metaAbstract" value="//*/metaAbstract/text()"/> </list> <list key="namespaces"> </list> </operator> <operator name="FeatureIterator" class="FeatureIterator" expanded="yes"> <parameter key="type_filter" value="nominal"/> <operator name="Nominal2String on current attribute only" class="AttributeSubsetPreprocessing" expanded="yes"> <parameter key="condition_class" value="attribute_name_filter"/> <parameter key="attribute_name_regex" value="%{loop_feature}"/> <parameter key="deliver_inner_results" value="true"/> <operator name="Nominal2String (2)" class="Nominal2String"> </operator> </operator> <operator name="StringTextInput" class="StringTextInput" expanded="yes"> <parameter key="remove_original_attributes" value="true"/> <parameter key="return_word_list" value="true"/> <parameter key="output_word_list" value="C:\Main\eclipse\workspace\octopus\RapidMiner\OctopusWordList.txt"/> <list key="namespaces"> </list> <parameter key="create_text_visualizer" value="true"/> <operator name="StringTokenizer" class="StringTokenizer"> </operator> <operator name="TokenLengthFilter" class="TokenLengthFilter"> <parameter key="min_chars" value="3"/> </operator> <operator name="ToLowerCaseConverter" class="ToLowerCaseConverter"> </operator> <operator name="EnglishStopwordFilter" class="EnglishStopwordFilter"> </operator> <operator name="PorterStemmer" class="PorterStemmer"> </operator> </operator> <operator name="ChangeAttributeNamesReplace" class="ChangeAttributeNamesReplace"> <parameter key="attributes" value="^[^feature_].*$"/> <parameter key="replace_what" value="^"/> <parameter key="replace_by" value="%{loop_feature}_"/> <parameter key="apply_on_special" value="false"/> </operator> </operator> <operator name="LibSVMLearner" class="LibSVMLearner"> <parameter key="keep_example_set" value="true"/> <parameter key="kernel_type" value="linear"/> <list key="class_weights"> </list> <parameter key="calculate_confidences" value="true"/> </operator> <operator name="ModelWriter" class="ModelWriter"> <parameter key="model_file" value="OctopusModel.mod"/> <parameter key="output_type" value="Binary"/> </operator></operator>
<operator name="Root" class="Process" expanded="yes"> <operator name="ExampleSetGenerator" class="ExampleSetGenerator"> <parameter key="target_function" value="sum"/> </operator> <operator name="IOStorer" class="IOStorer"> <parameter key="name" value="es"/> <parameter key="io_object" value="ExampleSet"/> <parameter key="remove_from_process" value="false"/> </operator> <operator name="FeatureIterator" class="FeatureIterator" expanded="yes"> <parameter key="filter" value=".*"/> <operator name="IOConsumer" class="IOConsumer"> <parameter key="io_object" value="ExampleSet"/> </operator> <operator name="IORetriever" class="IORetriever"> <parameter key="name" value="es"/> <parameter key="io_object" value="ExampleSet"/> </operator> <operator name="AttributeSubsetPreprocessing" class="AttributeSubsetPreprocessing" breakpoints="after" expanded="yes"> <parameter key="condition_class" value="attribute_name_filter"/> <parameter key="attribute_name_regex" value="%{loop_feature}"/> <operator name="BinDiscretization" class="BinDiscretization"> </operator> </operator> <operator name="IOStorer (2)" class="IOStorer"> <parameter key="name" value="es"/> <parameter key="io_object" value="ExampleSet"/> </operator> </operator> <operator name="IORetriever (2)" class="IORetriever"> <parameter key="name" value="es"/> <parameter key="io_object" value="ExampleSet"/> </operator></operator>