🎉Community Raffle - Win $25

An exclusive raffle opportunity for active members like you! Complete your profile, answer questions and get your first accepted badge to enter the raffle.
Join and Win

Errors Twitter data, Suddenly Attribute Label Missing, Inside Cross Validation, SVM and Apply Model

alineboraUser: "alinebora"
New Altair Community Member
Updated by Jocelyn

Hi everyone, I'm doing my process in rapidminer for my thesis, I have little time and I'm desperate need for help. I beg you please :( I have no one to help me with rapidminer, everyone else I know uses R or Python. 

I am getting an many errors in my process:

  1. First, Inside the 'Cross validation' operator it does not identify my attribute as a label. For test, I've added a 'Set role' operator towards 'SVM' operator and still doesn't work. Then for training, also 'Performance' operator doesn't recognize my label anymore.
  2. When I go back to process and add 'Set role' operator again before 'Cross validation' it doesn't work as well. In the list, my attribute is not listed anymore for some unknown reason, while in the ealier steps of the process it does show.
  3. Lastly to make even worse the 'Apply model' also does not recognize my attribute (also tried to add 'Set role' there). The attribute label "text" is just gone from the list.

I'm analyzing text from twitter. But since I had several queries to analyze for my research, I didn't get them from Rapidminer,instead I downloaded the data from twitter app developer and added to rapidminer. I post my xml process here.

Please help.

 

<?xml version="1.0" encoding="UTF-8"?><process version="9.0.000">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="9.0.000" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="read_csv" compatibility="9.0.000" expanded="true" height="68" name="Read CSV" width="90" x="45" y="34">
<parameter key="csv_file" value="C:\Users\aline\OneDrive\Documentos\AlineXX.csv"/>
<parameter key="column_separators" value=","/>
<parameter key="skip_comments" value="true"/>
<parameter key="date_format" value="dd/MM/yyyy HH:mm"/>
<list key="annotations"/>
<parameter key="encoding" value="windows-1252"/>
<list key="data_set_meta_data_information">
<parameter key="0" value="user_id.true.polynominal.attribute"/>
<parameter key="1" value="status_id.true.polynominal.attribute"/>
<parameter key="2" value="created_at.true.polynominal.attribute"/>
<parameter key="3" value="screen_name.true.polynominal.attribute"/>
<parameter key="4" value="text.true.polynominal.attribute"/>
<parameter key="5" value="source.true.polynominal.attribute"/>
<parameter key="6" value="display_text_width.true.polynominal.attribute"/>
<parameter key="7" value="reply_to_status_id.true.polynominal.attribute"/>
<parameter key="8" value="reply_to_user_id.true.polynominal.attribute"/>
<parameter key="9" value="reply_to_screen_name.true.polynominal.attribute"/>
<parameter key="10" value="is_quote.true.polynominal.attribute"/>
<parameter key="11" value="is_retweet.true.polynominal.attribute"/>
<parameter key="12" value="favorite_count.true.polynominal.attribute"/>
<parameter key="13" value="retweet_count.true.polynominal.attribute"/>
<parameter key="14" value="hashtags.true.polynominal.attribute"/>
<parameter key="15" value="symbols.true.polynominal.attribute"/>
<parameter key="16" value="urls_url.true.polynominal.attribute"/>
<parameter key="17" value="urls_t\.co.true.polynominal.attribute"/>
<parameter key="18" value="urls_expanded_url.true.polynominal.attribute"/>
<parameter key="19" value="media_url.true.polynominal.attribute"/>
<parameter key="20" value="media_t\.co.true.polynominal.attribute"/>
<parameter key="21" value="media_expanded_url.true.polynominal.attribute"/>
<parameter key="22" value="media_type.true.polynominal.attribute"/>
<parameter key="23" value="ext_media_url.true.polynominal.attribute"/>
<parameter key="24" value="ext_media_t\.co.true.polynominal.attribute"/>
<parameter key="25" value="ext_media_expanded_url.true.polynominal.attribute"/>
<parameter key="26" value="ext_media_type.true.polynominal.attribute"/>
<parameter key="27" value="mentions_user_id.true.polynominal.attribute"/>
<parameter key="28" value="mentions_screen_name.true.polynominal.attribute"/>
<parameter key="29" value="lang.true.polynominal.attribute"/>
<parameter key="30" value="quoted_status_id.true.polynominal.attribute"/>
<parameter key="31" value="quoted_text.true.polynominal.attribute"/>
<parameter key="32" value="quoted_created_at.true.polynominal.attribute"/>
<parameter key="33" value="quoted_source.true.polynominal.attribute"/>
<parameter key="34" value="quoted_favorite_count.true.polynominal.attribute"/>
<parameter key="35" value="quoted_retweet_count.true.polynominal.attribute"/>
<parameter key="36" value="quoted_user_id.true.polynominal.attribute"/>
<parameter key="37" value="quoted_screen_name.true.polynominal.attribute"/>
<parameter key="38" value="quoted_name.true.polynominal.attribute"/>
<parameter key="39" value="quoted_followers_count.true.polynominal.attribute"/>
<parameter key="40" value="quoted_friends_count.true.polynominal.attribute"/>
<parameter key="41" value="quoted_statuses_count.true.polynominal.attribute"/>
<parameter key="42" value="quoted_location.true.polynominal.attribute"/>
<parameter key="43" value="quoted_description.true.polynominal.attribute"/>
<parameter key="44" value="quoted_verified.true.polynominal.attribute"/>
<parameter key="45" value="retweet_status_id.true.polynominal.attribute"/>
<parameter key="46" value="retweet_text.true.polynominal.attribute"/>
<parameter key="47" value="retweet_created_at.true.polynominal.attribute"/>
<parameter key="48" value="retweet_source.true.polynominal.attribute"/>
<parameter key="49" value="retweet_favorite_count.true.polynominal.attribute"/>
<parameter key="50" value="retweet_retweet_count.true.polynominal.attribute"/>
<parameter key="51" value="retweet_user_id.true.polynominal.attribute"/>
<parameter key="52" value="retweet_screen_name.true.polynominal.attribute"/>
<parameter key="53" value="retweet_name.true.polynominal.attribute"/>
<parameter key="54" value="retweet_followers_count.true.polynominal.attribute"/>
<parameter key="55" value="retweet_friends_count.true.polynominal.attribute"/>
<parameter key="56" value="retweet_statuses_count.true.polynominal.attribute"/>
<parameter key="57" value="retweet_location.true.polynominal.attribute"/>
<parameter key="58" value="retweet_description.true.polynominal.attribute"/>
<parameter key="59" value="retweet_verified.true.polynominal.attribute"/>
<parameter key="60" value="place_url.true.polynominal.attribute"/>
<parameter key="61" value="place_name.true.polynominal.attribute"/>
<parameter key="62" value="place_full_name.true.polynominal.attribute"/>
<parameter key="63" value="place_type.true.polynominal.attribute"/>
<parameter key="64" value="country.true.polynominal.attribute"/>
<parameter key="65" value="country_code.true.polynominal.attribute"/>
<parameter key="66" value="geo_coords.true.polynominal.attribute"/>
<parameter key="67" value="coords_coords.true.polynominal.attribute"/>
<parameter key="68" value="bbox_coords.true.polynominal.attribute"/>
<parameter key="69" value="status_url.true.polynominal.attribute"/>
<parameter key="70" value="name.true.polynominal.attribute"/>
<parameter key="71" value="location.true.polynominal.attribute"/>
<parameter key="72" value="description.true.polynominal.attribute"/>
<parameter key="73" value="url.true.polynominal.attribute"/>
<parameter key="74" value="protected.true.polynominal.attribute"/>
<parameter key="75" value="followers_count.true.integer.attribute"/>
<parameter key="76" value="friends_count.true.polynominal.attribute"/>
<parameter key="77" value="listed_count.true.polynominal.attribute"/>
<parameter key="78" value="statuses_count.true.polynominal.attribute"/>
<parameter key="79" value="favourites_count.true.polynominal.attribute"/>
<parameter key="80" value="account_created_at.true.polynominal.attribute"/>
<parameter key="81" value="verified.true.polynominal.attribute"/>
<parameter key="82" value="profile_url.true.polynominal.attribute"/>
<parameter key="83" value="profile_expanded_url.true.polynominal.attribute"/>
<parameter key="84" value="account_lang.true.polynominal.attribute"/>
<parameter key="85" value="profile_banner_url.true.polynominal.attribute"/>
<parameter key="86" value="profile_background_url.true.polynominal.attribute"/>
<parameter key="87" value="profile_image_url.true.polynominal.attribute"/>
<parameter key="88" value="att89.true.polynominal.attribute"/>
<parameter key="89" value="att90.true.polynominal.attribute"/>
</list>
</operator>
<operator activated="true" class="set_role" compatibility="9.0.000" expanded="true" height="82" name="Set Role" width="90" x="45" y="136">
<parameter key="attribute_name" value="text"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="filter_examples" compatibility="9.0.000" expanded="true" height="103" name="Filter Examples" width="90" x="179" y="34">
<list key="filters_list">
<parameter key="filters_entry_key" value="text.is_not_missing."/>
<parameter key="filters_entry_key" value="text.contains.strike"/>
</list>
</operator>
<operator activated="true" class="nominal_to_text" compatibility="9.0.000" expanded="true" height="82" name="Nominal to Text" width="90" x="179" y="187">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="text"/>
<parameter key="include_special_attributes" value="true"/>
</operator>
<operator activated="true" class="textSmiley Tonguerocess_document_from_data" compatibility="8.1.000" expanded="true" height="82" name="Process Documents from Data" width="90" x="313" y="85">
<parameter key="select_attributes_and_weights" value="true"/>
<list key="specify_weights">
<parameter key="text" value="1.0"/>
</list>
<process expanded="true">
<operator activated="true" class="text:tokenize" compatibility="8.1.000" expanded="true" height="68" name="Tokenize" width="90" x="112" y="34"/>
<operator activated="true" class="text:transform_cases" compatibility="8.1.000" expanded="true" height="68" name="Transform Cases" width="90" x="246" y="34"/>
<operator activated="true" class="text:filter_stopwords_english" compatibility="8.1.000" expanded="true" height="68" name="Filter Stopwords (English)" width="90" x="380" y="34"/>
<connect from_port="document" to_op="Tokenize" to_port="document"/>
<connect from_op="Tokenize" from_port="document" to_op="Transform Cases" to_port="document"/>
<connect from_op="Transform Cases" from_port="document" to_op="Filter Stopwords (English)" to_port="document"/>
<connect from_op="Filter Stopwords (English)" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="text:create_document" compatibility="8.1.000" expanded="true" height="68" name="Create Document" width="90" x="45" y="289">
<parameter key="text" value="My flight has been cancelled&#10;I'm very tired because nobody is giving information&#10;We have been waiting at the airport for hours&#10;I don't want to fly in this company ever again&#10;I have a big problem with this cancellation&#10;The service is horrible, nobody gives an explanation&#10;I'm going on a business trip&#10;I'm going on vacation&#10;Travelling with family&#10;Flight is delayed"/>
</operator>
<operator activated="true" class="textSmiley Tonguerocess_documents" compatibility="8.1.000" expanded="true" height="103" name="Process Documents" width="90" x="313" y="289">
<process expanded="true">
<operator activated="true" class="text:tokenize" compatibility="8.1.000" expanded="true" height="68" name="Tokenize (2)" width="90" x="112" y="34"/>
<operator activated="true" class="text:transform_cases" compatibility="8.1.000" expanded="true" height="68" name="Transform Cases (2)" width="90" x="246" y="34"/>
<operator activated="true" class="text:filter_stopwords_english" compatibility="8.1.000" expanded="true" height="68" name="Filter Stopwords (2)" width="90" x="380" y="34"/>
<connect from_port="document" to_op="Tokenize (2)" to_port="document"/>
<connect from_op="Tokenize (2)" from_port="document" to_op="Transform Cases (2)" to_port="document"/>
<connect from_op="Transform Cases (2)" from_port="document" to_op="Filter Stopwords (2)" to_port="document"/>
<connect from_op="Filter Stopwords (2)" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="set_role" compatibility="9.0.000" expanded="true" height="82" name="Set Role (4)" width="90" x="447" y="34">
<parameter key="attribute_name" value="user_id"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="concurrency:cross_validation" compatibility="9.0.000" expanded="true" height="145" name="Cross Validation" width="90" x="581" y="34">
<process expanded="true">
<operator activated="true" class="set_role" compatibility="9.0.000" expanded="true" height="82" name="Set Role (3)" width="90" x="44" y="34">
<parameter key="attribute_name" value="user_id"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="naive_bayes" compatibility="9.0.000" expanded="true" height="82" name="Naive Bayes" width="90" x="246" y="34"/>
<connect from_port="training set" to_op="Set Role (3)" to_port="example set input"/>
<connect from_op="Set Role (3)" from_port="example set output" to_op="Naive Bayes" to_port="training set"/>
<connect from_op="Naive Bayes" from_port="model" to_port="model"/>
<portSpacing port="source_training set" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="9.0.000" expanded="true" height="82" name="Apply Model (2)" width="90" x="45" y="34">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="set_role" compatibility="9.0.000" expanded="true" height="82" name="Set Role (2)" width="90" x="112" y="136">
<parameter key="attribute_name" value="user_id"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="performance" compatibility="9.0.000" expanded="true" height="82" name="Performance" width="90" x="246" y="85"/>
<connect from_port="model" to_op="Apply Model (2)" to_port="model"/>
<connect from_port="test set" to_op="Apply Model (2)" to_port="unlabelled data"/>
<connect from_op="Apply Model (2)" from_port="labelled data" to_op="Set Role (2)" to_port="example set input"/>
<connect from_op="Set Role (2)" from_port="example set output" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_port="performance 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_test set results" spacing="0"/>
<portSpacing port="sink_performance 1" spacing="0"/>
<portSpacing port="sink_performance 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="apply_model" compatibility="9.0.000" expanded="true" height="82" name="Apply Model" width="90" x="514" y="289">
<list key="application_parameters"/>
</operator>
<connect from_op="Read CSV" from_port="output" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
<connect from_op="Filter Examples" from_port="example set output" to_op="Nominal to Text" to_port="example set input"/>
<connect from_op="Nominal to Text" from_port="example set output" to_op="Process Documents from Data" to_port="example set"/>
<connect from_op="Process Documents from Data" from_port="example set" to_op="Set Role (4)" to_port="example set input"/>
<connect from_op="Process Documents from Data" from_port="word list" to_op="Process Documents" to_port="word list"/>
<connect from_op="Create Document" from_port="output" to_op="Process Documents" to_port="documents 1"/>
<connect from_op="Process Documents" from_port="example set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Set Role (4)" from_port="example set output" to_op="Cross Validation" to_port="example set"/>
<connect from_op="Cross Validation" from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_op="Cross Validation" from_port="example set" to_port="result 1"/>
<connect from_op="Apply Model" from_port="labelled data" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>

 

 

 

 

Sort by:
1 - 1 of 11
    Thomas_OttUser: "Thomas_Ott"
    New Altair Community Member
    Accepted Answer

    @alinebora I made some tweaks. It appears that the Twitter operators you added aren't playing nicely for some reason. My guess it has to do with an encoding issue. I also disabled the store operators. They all repath to my laptop so they'd break for you.  I just tested this, it works on my end. 

     

    <?xml version="1.0" encoding="UTF-8"?><process version="8.2.001">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="8.2.001" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="true" class="subprocess" compatibility="8.2.001" expanded="true" height="82" name="Retrieve Twitter Data" width="90" x="45" y="34">
    <process expanded="true">
    <operator activated="true" class="set_macros" compatibility="8.2.001" expanded="true" height="68" name="Set Macros" width="90" x="45" y="34">
    <list key="macros">
    <parameter key="keyword1" value="#flight"/>
    <parameter key="keyword2" value="#airlines"/>
    <parameter key="keyword3" value="#airport"/>
    <parameter key="retweetcount" value="5"/>
    </list>
    <description align="center" color="transparent" colored="false" width="126">Set global variables here. Such as keyword search.</description>
    </operator>
    <operator activated="false" class="retrieve" compatibility="8.2.001" expanded="true" height="68" name="Retrieve Twitter Content Ideas" width="90" x="45" y="340">
    <parameter key="repository_entry" value="../data/%{keyword1} Twitter Content Ideas"/>
    </operator>
    <operator activated="true" class="social_media:search_twitter" compatibility="8.0.010" expanded="true" height="68" name="Search Twitter for Keyword3" width="90" x="179" y="238">
    <parameter key="connection" value="NewConnection"/>
    <parameter key="query" value="airfrance"/>
    <parameter key="language" value="en"/>
    </operator>
    <operator activated="true" class="social_media:search_twitter" compatibility="8.0.010" expanded="true" height="68" name="Search Twitter for Keyword2" width="90" x="179" y="136">
    <parameter key="connection" value="NewConnection"/>
    <parameter key="query" value="easyjet"/>
    <parameter key="language" value="en"/>
    </operator>
    <operator activated="true" class="social_media:search_twitter" compatibility="8.0.010" expanded="true" height="68" name="Search Twitter for Keyword 1" width="90" x="179" y="34">
    <parameter key="connection" value="NewConnection"/>
    <parameter key="query" value="ryanair"/>
    <parameter key="language" value="en"/>
    </operator>
    <operator activated="false" class="social_media:search_twitter" compatibility="8.1.000" expanded="true" height="68" name="Search Twitter for Keyword 4" width="90" x="45" y="442">
    <parameter key="connection" value="NewConnection"/>
    <parameter key="query" value="alitalia"/>
    <parameter key="limit" value="1000"/>
    <parameter key="language" value="en"/>
    </operator>
    <operator activated="false" class="social_media:search_twitter" compatibility="8.1.000" expanded="true" height="68" name="Search Twitter" width="90" x="45" y="493">
    <parameter key="connection" value="NewConnection"/>
    <parameter key="query" value="klm"/>
    <parameter key="limit" value="1000"/>
    <parameter key="language" value="en"/>
    </operator>
    <operator activated="false" class="store" compatibility="8.2.001" expanded="true" height="68" name="Store Data for later reuse" width="90" x="715" y="34">
    <parameter key="repository_entry" value="//Local Repository/processes/Thom1"/>
    </operator>
    <operator activated="true" class="social_media:search_twitter" compatibility="8.0.010" expanded="true" height="68" name="Search Twitter for Keyword3 (2)" width="90" x="179" y="340">
    <parameter key="connection" value="NewConnection"/>
    <parameter key="query" value="alitalia"/>
    <parameter key="language" value="en"/>
    </operator>
    <operator activated="true" class="social_media:search_twitter" compatibility="8.0.010" expanded="true" height="68" name="Search Twitter for Keyword3 (3)" width="90" x="179" y="442">
    <parameter key="connection" value="NewConnection"/>
    <parameter key="query" value="klm"/>
    <parameter key="language" value="en"/>
    </operator>
    <operator activated="true" class="append" compatibility="8.2.001" expanded="true" height="166" name="Append Data Set together" width="90" x="447" y="34"/>
    <operator activated="true" class="remove_duplicates" compatibility="8.2.001" expanded="true" height="103" name="Remove Duplicate IDs" width="90" x="581" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="Id"/>
    <parameter key="include_special_attributes" value="true"/>
    </operator>
    <connect from_op="Search Twitter for Keyword3" from_port="output" to_op="Append Data Set together" to_port="example set 3"/>
    <connect from_op="Search Twitter for Keyword2" from_port="output" to_op="Append Data Set together" to_port="example set 2"/>
    <connect from_op="Search Twitter for Keyword 1" from_port="output" to_op="Append Data Set together" to_port="example set 1"/>
    <connect from_op="Search Twitter for Keyword3 (2)" from_port="output" to_op="Append Data Set together" to_port="example set 4"/>
    <connect from_op="Search Twitter for Keyword3 (3)" from_port="output" to_op="Append Data Set together" to_port="example set 5"/>
    <connect from_op="Append Data Set together" from_port="merged set" to_op="Remove Duplicate IDs" to_port="example set input"/>
    <connect from_op="Remove Duplicate IDs" from_port="example set output" to_port="out 1"/>
    <portSpacing port="source_in 1" spacing="0"/>
    <portSpacing port="sink_out 1" spacing="0"/>
    <portSpacing port="sink_out 2" spacing="0"/>
    </process>
    <description align="center" color="transparent" colored="false" width="126">Retrieves Twitter Data, Appends, and Stores</description>
    </operator>
    <operator activated="true" class="subprocess" compatibility="8.2.001" expanded="true" height="82" name="ETL Subprocess" width="90" x="179" y="34">
    <process expanded="true">
    <operator activated="true" class="remove_duplicates" compatibility="8.2.001" expanded="true" height="103" name="Remove Duplicates" width="90" x="45" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="From-User"/>
    <description align="center" color="transparent" colored="false" width="126">Remove Duplicate Tweets from same user</description>
    </operator>
    <operator activated="true" class="generate_attributes" compatibility="8.2.001" expanded="true" height="82" name="Generate Arbitrary Label" width="90" x="179" y="34">
    <list key="function_descriptions">
    <parameter key="label" value="if([Retweet-Count]&lt;eval(%{retweetcount}),&quot;Not Important&quot;,&quot;Important&quot;)"/>
    </list>
    </operator>
    <operator activated="false" class="filter_examples" compatibility="8.2.001" expanded="true" height="103" name="Filter Examples" width="90" x="313" y="34">
    <parameter key="invert_filter" value="true"/>
    <list key="filters_list">
    <parameter key="filters_entry_key" value="Text.contains.RT"/>
    </list>
    </operator>
    <operator activated="true" class="set_role" compatibility="8.2.001" expanded="true" height="82" name="Set Role" width="90" x="447" y="34">
    <parameter key="attribute_name" value="label"/>
    <parameter key="target_role" value="label"/>
    <list key="set_additional_roles"/>
    <description align="center" color="transparent" colored="false" width="126">Set Role for Label</description>
    </operator>
    <operator activated="true" class="select_attributes" compatibility="8.2.001" expanded="true" height="82" name="Select Attributes" width="90" x="581" y="34">
    <parameter key="attribute_filter_type" value="subset"/>
    <parameter key="attributes" value="Text|label"/>
    <parameter key="include_special_attributes" value="true"/>
    </operator>
    <operator activated="true" class="nominal_to_text" compatibility="8.2.001" expanded="true" height="82" name="Nominal to Text" width="90" x="715" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="Text"/>
    </operator>
    <operator activated="true" class="extract_macro" compatibility="8.2.001" expanded="true" height="68" name="Extract Macro (3)" width="90" x="849" y="34">
    <parameter key="macro" value="label_count"/>
    <parameter key="macro_type" value="statistics"/>
    <parameter key="statistics" value="count"/>
    <parameter key="attribute_name" value="label"/>
    <parameter key="attribute_value" value="Important"/>
    <list key="additional_macros"/>
    </operator>
    <connect from_port="in 1" to_op="Remove Duplicates" to_port="example set input"/>
    <connect from_op="Remove Duplicates" from_port="example set output" to_op="Generate Arbitrary Label" to_port="example set input"/>
    <connect from_op="Generate Arbitrary Label" from_port="example set output" to_op="Set Role" to_port="example set input"/>
    <connect from_op="Set Role" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
    <connect from_op="Select Attributes" from_port="example set output" to_op="Nominal to Text" to_port="example set input"/>
    <connect from_op="Nominal to Text" from_port="example set output" to_op="Extract Macro (3)" to_port="example set"/>
    <connect from_op="Extract Macro (3)" from_port="example set" to_port="out 1"/>
    <portSpacing port="source_in 1" spacing="0"/>
    <portSpacing port="source_in 2" spacing="0"/>
    <portSpacing port="sink_out 1" spacing="0"/>
    <portSpacing port="sink_out 2" spacing="0"/>
    </process>
    <description align="center" color="transparent" colored="false" width="126">Binning for Label subprocess - suspect</description>
    </operator>
    <operator activated="true" class="text:process_document_from_data" compatibility="8.1.000" expanded="true" height="82" name="Process Documents from Data" width="90" x="313" y="34">
    <parameter key="prune_method" value="percentual"/>
    <parameter key="prune_below_percent" value="5.0"/>
    <parameter key="prune_above_percent" value="50.0"/>
    <parameter key="prune_below_absolute" value="100"/>
    <parameter key="prune_above_absolute" value="500"/>
    <list key="specify_weights"/>
    <process expanded="true">
    <operator activated="true" class="text:extract_information" compatibility="8.1.000" expanded="true" height="68" name="Extract Links for later use" width="90" x="45" y="34">
    <parameter key="query_type" value="Regular Expression"/>
    <list key="string_machting_queries"/>
    <list key="regular_expression_queries">
    <parameter key="Tweet Links" value="http.*"/>
    </list>
    <list key="regular_region_queries"/>
    <list key="xpath_queries"/>
    <list key="namespaces"/>
    <list key="index_queries"/>
    <list key="jsonpath_queries"/>
    </operator>
    <operator activated="true" class="text:replace_tokens" compatibility="8.1.000" expanded="true" height="68" name="Replace http links" width="90" x="179" y="34">
    <list key="replace_dictionary">
    <parameter key="http.*" value="link"/>
    </list>
    </operator>
    <operator activated="true" class="text:tokenize" compatibility="8.1.000" expanded="true" height="68" name="Tokenize" width="90" x="313" y="34">
    <parameter key="mode" value="specify characters"/>
    <parameter key="characters" value=" .!;:[,' ?]"/>
    </operator>
    <operator activated="true" class="text:transform_cases" compatibility="8.1.000" expanded="true" height="68" name="Transform Cases" width="90" x="447" y="34"/>
    <operator activated="true" class="text:filter_by_length" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (by Length)" width="90" x="581" y="34"/>
    <operator activated="true" class="text:generate_n_grams_terms" compatibility="8.1.000" expanded="true" height="68" name="Generate n-Grams (Terms)" width="90" x="715" y="34"/>
    <operator activated="true" class="text:filter_tokens_by_content" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (by Content)" width="90" x="849" y="34">
    <parameter key="string" value="link"/>
    <parameter key="invert condition" value="true"/>
    </operator>
    <operator activated="true" class="text:filter_stopwords_english" compatibility="8.1.000" expanded="true" height="68" name="Filter Stopwords (English)" width="90" x="983" y="34"/>
    <connect from_port="document" to_op="Extract Links for later use" to_port="document"/>
    <connect from_op="Extract Links for later use" from_port="document" to_op="Replace http links" to_port="document"/>
    <connect from_op="Replace http links" from_port="document" to_op="Tokenize" to_port="document"/>
    <connect from_op="Tokenize" from_port="document" to_op="Transform Cases" to_port="document"/>
    <connect from_op="Transform Cases" from_port="document" to_op="Filter Tokens (by Length)" to_port="document"/>
    <connect from_op="Filter Tokens (by Length)" from_port="document" to_op="Generate n-Grams (Terms)" to_port="document"/>
    <connect from_op="Generate n-Grams (Terms)" from_port="document" to_op="Filter Tokens (by Content)" to_port="document"/>
    <connect from_op="Filter Tokens (by Content)" from_port="document" to_op="Filter Stopwords (English)" to_port="document"/>
    <connect from_op="Filter Stopwords (English)" from_port="document" to_port="document 1"/>
    <portSpacing port="source_document" spacing="0"/>
    <portSpacing port="sink_document 1" spacing="0"/>
    <portSpacing port="sink_document 2" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="multiply" compatibility="8.2.001" expanded="true" height="103" name="Multiply" width="90" x="447" y="34"/>
    <operator activated="true" class="subprocess" compatibility="8.2.001" expanded="true" height="103" name="Clustering Stuff" width="90" x="581" y="34">
    <process expanded="true">
    <operator activated="true" class="select_attributes" compatibility="8.2.001" expanded="true" height="82" name="Remove Tweet Links" width="90" x="45" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="Tweet Links"/>
    <parameter key="attributes" value="Tweet Links"/>
    <parameter key="invert_selection" value="true"/>
    </operator>
    <operator activated="true" class="x_means" compatibility="7.5.003" expanded="true" height="82" name="X-Means" width="90" x="179" y="34">
    <parameter key="measure_types" value="BregmanDivergences"/>
    <parameter key="divergence" value="SquaredEuclideanDistance"/>
    </operator>
    <operator activated="true" class="extract_prototypes" compatibility="8.2.001" expanded="true" height="82" name="Extract Cluster Prototypes" width="90" x="313" y="136"/>
    <operator activated="false" class="store" compatibility="8.2.001" expanded="true" height="68" name="Store Cluster Model" width="90" x="447" y="34">
    <parameter key="repository_entry" value="../results/%{keyword1} Twitter Content Cluster Model"/>
    </operator>
    <connect from_port="in 1" to_op="Remove Tweet Links" to_port="example set input"/>
    <connect from_op="Remove Tweet Links" from_port="example set output" to_op="X-Means" to_port="example set"/>
    <connect from_op="X-Means" from_port="cluster model" to_op="Extract Cluster Prototypes" to_port="model"/>
    <connect from_op="Extract Cluster Prototypes" from_port="example set" to_port="out 1"/>
    <connect from_op="Extract Cluster Prototypes" from_port="model" to_port="out 2"/>
    <portSpacing port="source_in 1" spacing="0"/>
    <portSpacing port="source_in 2" spacing="0"/>
    <portSpacing port="sink_out 1" spacing="0"/>
    <portSpacing port="sink_out 2" spacing="0"/>
    <portSpacing port="sink_out 3" spacing="0"/>
    </process>
    </operator>
    <operator activated="false" class="store" compatibility="8.2.001" expanded="true" height="68" name="Store WordList" width="90" x="447" y="289">
    <parameter key="repository_entry" value="../results/%{keyword1} Twitter Content Ideas Wordlist"/>
    </operator>
    <operator activated="true" class="text:wordlist_to_data" compatibility="8.1.000" expanded="true" height="82" name="WordList to Data" width="90" x="581" y="289"/>
    <operator activated="true" class="sort" compatibility="8.2.001" expanded="true" height="82" name="Sort" width="90" x="715" y="289">
    <parameter key="attribute_name" value="total"/>
    <parameter key="sorting_direction" value="decreasing"/>
    </operator>
    <operator activated="true" class="select_attributes" compatibility="8.2.001" expanded="true" height="82" name="Remove Tweet Links (2)" width="90" x="581" y="187">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="Tweet Links"/>
    <parameter key="attributes" value="Tweet Links"/>
    <parameter key="invert_selection" value="true"/>
    </operator>
    <operator activated="true" class="subprocess" compatibility="8.2.001" expanded="true" height="82" name="Determine Influence Factors" width="90" x="715" y="187">
    <process expanded="true">
    <operator activated="true" class="weight_by_correlation" compatibility="8.2.001" expanded="true" height="82" name="Weight by Correlation" width="90" x="45" y="34"/>
    <operator activated="true" class="weights_to_data" compatibility="8.2.001" expanded="true" height="68" name="Weights to Data" width="90" x="179" y="34"/>
    <operator activated="true" class="generate_attributes" compatibility="6.4.000" expanded="true" height="82" name="Generate Attributes (2)" width="90" x="313" y="34">
    <list key="function_descriptions">
    <parameter key="Method" value="&quot;Correlation&quot;"/>
    </list>
    </operator>
    <operator activated="true" class="weight_by_gini_index" compatibility="8.2.001" expanded="true" height="82" name="Weight by Gini Index" width="90" x="45" y="120"/>
    <operator activated="true" class="weight_by_information_gain" compatibility="8.2.001" expanded="true" height="82" name="Weight by Information Gain" width="90" x="45" y="210"/>
    <operator activated="true" class="weight_by_information_gain_ratio" compatibility="8.2.001" expanded="true" height="82" name="Weight by Information Gain Ratio" width="90" x="45" y="300"/>
    <operator activated="true" class="weights_to_data" compatibility="8.2.001" expanded="true" height="68" name="Weights to Data (2)" width="90" x="179" y="120"/>
    <operator activated="true" class="generate_attributes" compatibility="6.4.000" expanded="true" height="82" name="Generate Attributes (3)" width="90" x="313" y="120">
    <list key="function_descriptions">
    <parameter key="Method" value="&quot;Gini&quot;"/>
    </list>
    </operator>
    <operator activated="true" class="weights_to_data" compatibility="8.2.001" expanded="true" height="68" name="Weights to Data (3)" width="90" x="179" y="210"/>
    <operator activated="true" class="generate_attributes" compatibility="6.4.000" expanded="true" height="82" name="Generate Attributes (4)" width="90" x="313" y="210">
    <list key="function_descriptions">
    <parameter key="Method" value="&quot;InfoGain&quot;"/>
    </list>
    </operator>
    <operator activated="true" class="weights_to_data" compatibility="8.2.001" expanded="true" height="68" name="Weights to Data (4)" width="90" x="179" y="300"/>
    <operator activated="true" class="generate_attributes" compatibility="6.4.000" expanded="true" height="82" name="Generate Attributes (5)" width="90" x="313" y="300">
    <list key="function_descriptions">
    <parameter key="Method" value="&quot;InfoGainRatio&quot;"/>
    </list>
    </operator>
    <operator activated="true" class="append" compatibility="8.2.001" expanded="true" height="145" name="Append" width="90" x="447" y="30"/>
    <operator activated="true" class="pivot" compatibility="8.2.001" expanded="true" height="82" name="Pivot" width="90" x="581" y="30">
    <parameter key="group_attribute" value="Attribute"/>
    <parameter key="index_attribute" value="Method"/>
    </operator>
    <operator activated="true" class="generate_aggregation" compatibility="6.5.002" expanded="true" height="82" name="Generate Aggregation" width="90" x="715" y="30">
    <parameter key="attribute_name" value="Importance"/>
    <parameter key="attribute_filter_type" value="value_type"/>
    <parameter key="value_type" value="numeric"/>
    <parameter key="aggregation_function" value="average"/>
    </operator>
    <operator activated="true" class="normalize" compatibility="7.5.003" expanded="true" height="103" name="Normalize" width="90" x="849" y="30">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="Importance"/>
    <parameter key="method" value="range transformation"/>
    </operator>
    <operator activated="true" class="sort" compatibility="8.2.001" expanded="true" height="82" name="Sort again" width="90" x="983" y="34">
    <parameter key="attribute_name" value="Importance"/>
    <parameter key="sorting_direction" value="decreasing"/>
    </operator>
    <operator activated="true" class="order_attributes" compatibility="8.2.001" expanded="true" height="82" name="Reorder Attributes" width="90" x="1117" y="34">
    <parameter key="attribute_ordering" value="Attribute|Importance"/>
    <parameter key="handle_unmatched" value="remove"/>
    </operator>
    <operator activated="true" class="filter_example_range" compatibility="8.2.001" expanded="true" height="82" name="Select Top 20" width="90" x="1251" y="34">
    <parameter key="first_example" value="1"/>
    <parameter key="last_example" value="20"/>
    </operator>
    <connect from_port="in 1" to_op="Weight by Correlation" to_port="example set"/>
    <connect from_op="Weight by Correlation" from_port="weights" to_op="Weights to Data" to_port="attribute weights"/>
    <connect from_op="Weight by Correlation" from_port="example set" to_op="Weight by Gini Index" to_port="example set"/>
    <connect from_op="Weights to Data" from_port="example set" to_op="Generate Attributes (2)" to_port="example set input"/>
    <connect from_op="Generate Attributes (2)" from_port="example set output" to_op="Append" to_port="example set 1"/>
    <connect from_op="Weight by Gini Index" from_port="weights" to_op="Weights to Data (2)" to_port="attribute weights"/>
    <connect from_op="Weight by Gini Index" from_port="example set" to_op="Weight by Information Gain" to_port="example set"/>
    <connect from_op="Weight by Information Gain" from_port="weights" to_op="Weights to Data (3)" to_port="attribute weights"/>
    <connect from_op="Weight by Information Gain" from_port="example set" to_op="Weight by Information Gain Ratio" to_port="example set"/>
    <connect from_op="Weight by Information Gain Ratio" from_port="weights" to_op="Weights to Data (4)" to_port="attribute weights"/>
    <connect from_op="Weights to Data (2)" from_port="example set" to_op="Generate Attributes (3)" to_port="example set input"/>
    <connect from_op="Generate Attributes (3)" from_port="example set output" to_op="Append" to_port="example set 2"/>
    <connect from_op="Weights to Data (3)" from_port="example set" to_op="Generate Attributes (4)" to_port="example set input"/>
    <connect from_op="Generate Attributes (4)" from_port="example set output" to_op="Append" to_port="example set 3"/>
    <connect from_op="Weights to Data (4)" from_port="example set" to_op="Generate Attributes (5)" to_port="example set input"/>
    <connect from_op="Generate Attributes (5)" from_port="example set output" to_op="Append" to_port="example set 4"/>
    <connect from_op="Append" from_port="merged set" to_op="Pivot" to_port="example set input"/>
    <connect from_op="Pivot" from_port="example set output" to_op="Generate Aggregation" to_port="example set input"/>
    <connect from_op="Generate Aggregation" from_port="example set output" to_op="Normalize" to_port="example set input"/>
    <connect from_op="Normalize" from_port="example set output" to_op="Sort again" to_port="example set input"/>
    <connect from_op="Sort again" from_port="example set output" to_op="Reorder Attributes" to_port="example set input"/>
    <connect from_op="Reorder Attributes" from_port="example set output" to_op="Select Top 20" to_port="example set input"/>
    <connect from_op="Select Top 20" from_port="example set output" to_port="out 1"/>
    <portSpacing port="source_in 1" spacing="0"/>
    <portSpacing port="source_in 2" spacing="0"/>
    <portSpacing port="sink_out 1" spacing="0"/>
    <portSpacing port="sink_out 2" spacing="0"/>
    </process>
    </operator>
    <operator activated="false" class="store" compatibility="8.2.001" expanded="true" height="68" name="Store Influence Wrds" width="90" x="849" y="187">
    <parameter key="repository_entry" value="../results/%{keyword1} Twitter Content Influence Words"/>
    </operator>
    <operator activated="false" class="write_excel" compatibility="8.2.001" expanded="true" height="82" name="Write Important Words" width="90" x="983" y="187">
    <parameter key="excel_file" value="C:\Users\Thomas Ott\Dropbox\Twitter Influencers\%{keyword1} Todays Powerful Words to use in your Tweets.xlsx"/>
    </operator>
    <connect from_op="Retrieve Twitter Data" from_port="out 1" to_op="ETL Subprocess" to_port="in 1"/>
    <connect from_op="ETL Subprocess" from_port="out 1" to_op="Process Documents from Data" to_port="example set"/>
    <connect from_op="Process Documents from Data" from_port="example set" to_op="Multiply" to_port="input"/>
    <connect from_op="Process Documents from Data" from_port="word list" to_op="WordList to Data" to_port="word list"/>
    <connect from_op="Multiply" from_port="output 1" to_op="Clustering Stuff" to_port="in 1"/>
    <connect from_op="Multiply" from_port="output 2" to_op="Remove Tweet Links (2)" to_port="example set input"/>
    <connect from_op="Clustering Stuff" from_port="out 1" to_port="result 1"/>
    <connect from_op="Clustering Stuff" from_port="out 2" to_port="result 2"/>
    <connect from_op="WordList to Data" from_port="example set" to_op="Sort" to_port="example set input"/>
    <connect from_op="Sort" from_port="example set output" to_port="result 4"/>
    <connect from_op="Remove Tweet Links (2)" from_port="example set output" to_op="Determine Influence Factors" to_port="in 1"/>
    <connect from_op="Determine Influence Factors" from_port="out 1" to_port="result 3"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="63"/>
    <portSpacing port="sink_result 3" spacing="126"/>
    <portSpacing port="sink_result 4" spacing="84"/>
    <portSpacing port="sink_result 5" spacing="0"/>
    </process>
    </operator>
    </process>