"How to get a Word list from select by weights operator?"
hi guys,
i built model for sentiment analysis, i used Weight by Correlation and Select by Weights operations in my model.
i training my model with dataset of 1950 rows. i want to test my model with 3000 rows using apply model with wordlist generated from select by Weights not from wordlist that generated from process document from data operator.
my question how to get Word list from select by weights? Is there any macro or script do that?
My process
<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<operator activated="true" class="retrieve" compatibility="7.6.001" expanded="true" height="68" name="Retrieve Historical Sentiments" width="90" x="45" y="187">
<parameter key="repository_entry" value="//Samples/Templates/Sentiment Analysis/Historical Sentiments"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<operator activated="true" class="set_role" compatibility="7.6.001" expanded="true" height="82" name="Set Role" width="90" x="179" y="187">
<parameter key="attribute_name" value="Sentiment"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<operator activated="true" class="nominal_to_text" compatibility="7.6.001" expanded="true" height="82" name="Nominal to Text" width="90" x="313" y="187">
<parameter key="attribute_filter_type" value="all"/>
<parameter key="attribute" value=""/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="nominal"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="file_path"/>
<parameter key="block_type" value="single_value"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="single_value"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<operator activated="true" class="text:process_document_from_data" compatibility="7.5.000" expanded="true" height="82" name="Process Documents from Data" width="90" x="447" y="187">
<parameter key="create_word_vector" value="true"/>
<parameter key="vector_creation" value="TF-IDF"/>
<parameter key="add_meta_information" value="true"/>
<parameter key="keep_text" value="true"/>
<parameter key="prune_method" value="none"/>
<parameter key="prune_below_percent" value="3.0"/>
<parameter key="prune_above_percent" value="30.0"/>
<parameter key="prune_below_rank" value="0.05"/>
<parameter key="prune_above_rank" value="0.95"/>
<parameter key="datamanagement" value="double_sparse_array"/>
<parameter key="data_management" value="auto"/>
<parameter key="select_attributes_and_weights" value="false"/>
<list key="specify_weights"/>
<process expanded="true">
<operator activated="true" class="text:tokenize" compatibility="7.5.000" expanded="true" height="68" name="Tokenize" width="90" x="45" y="30">
<parameter key="mode" value="non letters"/>
<parameter key="characters" value=".:"/>
<parameter key="language" value="English"/>
<parameter key="max_token_length" value="3"/>
</operator>
<operator activated="true" class="text:transform_cases" compatibility="7.5.000" expanded="true" height="68" name="Transform Cases" width="90" x="246" y="34">
<parameter key="transform_to" value="lower case"/>
</operator>
<operator activated="true" class="text:filter_stopwords_english" compatibility="7.5.000" expanded="true" height="68" name="Filter Stopwords (English)" width="90" x="380" y="30"/>
<connect from_port="document" to_op="Tokenize" to_port="document"/>
<connect from_op="Tokenize" from_port="document" to_op="Transform Cases" to_port="document"/>
<connect from_op="Transform Cases" from_port="document" to_op="Filter Stopwords (English)" to_port="document"/>
<connect from_op="Filter Stopwords (English)" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<operator activated="true" class="weight_by_correlation" compatibility="7.6.001" expanded="true" height="82" name="Weight by Correlation" width="90" x="581" y="187">
<parameter key="normalize_weights" value="false"/>
<parameter key="sort_weights" value="true"/>
<parameter key="sort_direction" value="ascending"/>
<parameter key="squared_correlation" value="false"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<operator activated="true" class="select_by_weights" compatibility="7.6.001" expanded="true" height="103" name="Select by Weights" width="90" x="715" y="187">
<parameter key="weight_relation" value="top p%"/>
<parameter key="weight" value="1.0"/>
<parameter key="k" value="1000"/>
<parameter key="p" value="0.5"/>
<parameter key="deselect_unknown" value="true"/>
<parameter key="use_absolute_weights" value="true"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<operator activated="true" class="concurrency:cross_validation" compatibility="7.6.001" expanded="true" height="145" name="Cross Validation" width="90" x="983" y="187">
<parameter key="split_on_batch_attribute" value="false"/>
<parameter key="leave_one_out" value="false"/>
<parameter key="number_of_folds" value="10"/>
<parameter key="sampling_type" value="automatic"/>
<parameter key="use_local_random_seed" value="false"/>
<parameter key="local_random_seed" value="1992"/>
<parameter key="enable_parallel_execution" value="true"/>
<process expanded="true">
<operator activated="true" class="support_vector_machine" compatibility="7.6.001" expanded="true" height="124" name="SVM" width="90" x="179" y="34">
<parameter key="kernel_type" value="dot"/>
<parameter key="kernel_gamma" value="1.0"/>
<parameter key="kernel_sigma1" value="1.0"/>
<parameter key="kernel_sigma2" value="0.0"/>
<parameter key="kernel_sigma3" value="2.0"/>
<parameter key="kernel_shift" value="1.0"/>
<parameter key="kernel_degree" value="2.0"/>
<parameter key="kernel_a" value="1.0"/>
<parameter key="kernel_b" value="0.0"/>
<parameter key="kernel_cache" value="200"/>
<parameter key="C" value="0.0"/>
<parameter key="convergence_epsilon" value="0.001"/>
<parameter key="max_iterations" value="100000"/>
<parameter key="scale" value="true"/>
<parameter key="calculate_weights" value="true"/>
<parameter key="return_optimization_performance" value="true"/>
<parameter key="L_pos" value="1.0"/>
<parameter key="L_neg" value="1.0"/>
<parameter key="epsilon" value="0.0"/>
<parameter key="epsilon_plus" value="0.0"/>
<parameter key="epsilon_minus" value="0.0"/>
<parameter key="balance_cost" value="false"/>
<parameter key="quadratic_loss_pos" value="false"/>
<parameter key="quadratic_loss_neg" value="false"/>
<parameter key="estimate_performance" value="false"/>
</operator>
<connect from_port="training set" to_op="SVM" to_port="training set"/>
<connect from_op="SVM" from_port="model" to_port="model"/>
<portSpacing port="source_training set" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="7.6.001" expanded="true" height="82" name="Apply Model" width="90" x="112" y="34">
<list key="application_parameters"/>
<parameter key="create_view" value="false"/>
</operator>
<operator activated="true" class="performance_binominal_classification" compatibility="7.6.001" expanded="true" height="82" name="Performance" width="90" x="313" y="34">
<parameter key="main_criterion" value="first"/>
<parameter key="accuracy" value="true"/>
<parameter key="classification_error" value="false"/>
<parameter key="kappa" value="false"/>
<parameter key="AUC (optimistic)" value="false"/>
<parameter key="AUC" value="false"/>
<parameter key="AUC (pessimistic)" value="false"/>
<parameter key="precision" value="false"/>
<parameter key="recall" value="false"/>
<parameter key="lift" value="false"/>
<parameter key="fallout" value="false"/>
<parameter key="f_measure" value="false"/>
<parameter key="false_positive" value="false"/>
<parameter key="false_negative" value="false"/>
<parameter key="true_positive" value="false"/>
<parameter key="true_negative" value="false"/>
<parameter key="sensitivity" value="false"/>
<parameter key="specificity" value="false"/>
<parameter key="youden" value="false"/>
<parameter key="positive_predictive_value" value="false"/>
<parameter key="negative_predictive_value" value="false"/>
<parameter key="psep" value="false"/>
<parameter key="skip_undefined_labels" value="true"/>
<parameter key="use_example_weights" value="true"/>
</operator>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_port="performance 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_test set results" spacing="0"/>
<portSpacing port="sink_performance 1" spacing="0"/>
<portSpacing port="sink_performance 2" spacing="0"/>
</process>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<operator activated="true" class="text:create_document" compatibility="7.5.000" expanded="true" height="68" name="Create Document" width="90" x="313" y="442">
<parameter key="text" value="Watching the movie , i vowed to subtract half a star from the review because the filmmakers included a saccharine syrup , cute 'n cuddly, computer-generated monkey. If the monkey died, the movie got an extra half star. Alas, the monkey showed up at the end , unharmed , to the wet sickly sound of gagging and rolling eyeballs . what that means is that lost in space actually deserved 2 stars . that's pretty generous , considering the movie's camp lacked any hint of tongue in cheek . when will robinson ( jack johnson ) teaches his pet robot about friendship , you are actually supposed to buy it . so why such a seemingly high rating? there are a few reasons that made me unable to throw away the experience with the garbage . Nobody in the country enjoyed the movie as much as my audience did ( and i don't necessarily mean that in a good way ) . This is overlaid on a cool jumpy credit sequence with interesting distorted clips from the movie . but if that's the best part of the movie , i can't in good conscience recommend it."/>
<parameter key="add label" value="false"/>
<parameter key="label_type" value="nominal"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<operator activated="true" class="text:process_documents" compatibility="7.5.000" expanded="true" height="103" name="Process Documents" width="90" x="581" y="442">
<parameter key="create_word_vector" value="true"/>
<parameter key="vector_creation" value="TF-IDF"/>
<parameter key="add_meta_information" value="true"/>
<parameter key="keep_text" value="true"/>
<parameter key="prune_method" value="none"/>
<parameter key="prune_below_percent" value="3.0"/>
<parameter key="prune_above_percent" value="30.0"/>
<parameter key="prune_below_rank" value="0.05"/>
<parameter key="prune_above_rank" value="0.95"/>
<parameter key="datamanagement" value="double_sparse_array"/>
<parameter key="data_management" value="auto"/>
<process expanded="true">
<operator activated="true" class="text:tokenize" compatibility="7.5.000" expanded="true" height="68" name="Tokenize (2)" width="90" x="112" y="34">
<parameter key="mode" value="non letters"/>
<parameter key="characters" value=".:"/>
<parameter key="language" value="English"/>
<parameter key="max_token_length" value="3"/>
</operator>
<operator activated="true" class="text:transform_cases" compatibility="7.5.000" expanded="true" height="68" name="Transform Cases (2)" width="90" x="246" y="34">
<parameter key="transform_to" value="lower case"/>
</operator>
<operator activated="true" class="text:filter_stopwords_english" compatibility="7.5.000" expanded="true" height="68" name="Filter Stopwords (2)" width="90" x="380" y="30"/>
<connect from_port="document" to_op="Tokenize (2)" to_port="document"/>
<connect from_op="Tokenize (2)" from_port="document" to_op="Transform Cases (2)" to_port="document"/>
<connect from_op="Transform Cases (2)" from_port="document" to_op="Filter Stopwords (2)" to_port="document"/>
<connect from_op="Filter Stopwords (2)" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.6.001">
<operator activated="true" class="apply_model" compatibility="7.6.001" expanded="true" height="82" name="Apply Model (2)" width="90" x="1050" y="442">
<list key="application_parameters"/>
<parameter key="create_view" value="false"/>
</operator>
</process>
could you please help me to solve my problem?
regards,