Errors Twitter data, Suddenly Attribute Label Missing, Inside Cross Validation, SVM and Apply Model
Hi everyone, I'm doing my process in rapidminer for my thesis, I have little time and I'm desperate need for help. I beg you please I have no one to help me with rapidminer, everyone else I know uses R or Python.
I am getting an many errors in my process:
- First, Inside the 'Cross validation' operator it does not identify my attribute as a label. For test, I've added a 'Set role' operator towards 'SVM' operator and still doesn't work. Then for training, also 'Performance' operator doesn't recognize my label anymore.
- When I go back to process and add 'Set role' operator again before 'Cross validation' it doesn't work as well. In the list, my attribute is not listed anymore for some unknown reason, while in the ealier steps of the process it does show.
- Lastly to make even worse the 'Apply model' also does not recognize my attribute (also tried to add 'Set role' there). The attribute label "text" is just gone from the list.
I'm analyzing text from twitter. But since I had several queries to analyze for my research, I didn't get them from Rapidminer,instead I downloaded the data from twitter app developer and added to rapidminer. I post my xml process here.
Please help.
<?xml version="1.0" encoding="UTF-8"?><process version="9.0.000">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="9.0.000" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="read_csv" compatibility="9.0.000" expanded="true" height="68" name="Read CSV" width="90" x="45" y="34">
<parameter key="csv_file" value="C:\Users\aline\OneDrive\Documentos\AlineXX.csv"/>
<parameter key="column_separators" value=","/>
<parameter key="skip_comments" value="true"/>
<parameter key="date_format" value="dd/MM/yyyy HH:mm"/>
<list key="annotations"/>
<parameter key="encoding" value="windows-1252"/>
<list key="data_set_meta_data_information">
<parameter key="0" value="user_id.true.polynominal.attribute"/>
<parameter key="1" value="status_id.true.polynominal.attribute"/>
<parameter key="2" value="created_at.true.polynominal.attribute"/>
<parameter key="3" value="screen_name.true.polynominal.attribute"/>
<parameter key="4" value="text.true.polynominal.attribute"/>
<parameter key="5" value="source.true.polynominal.attribute"/>
<parameter key="6" value="display_text_width.true.polynominal.attribute"/>
<parameter key="7" value="reply_to_status_id.true.polynominal.attribute"/>
<parameter key="8" value="reply_to_user_id.true.polynominal.attribute"/>
<parameter key="9" value="reply_to_screen_name.true.polynominal.attribute"/>
<parameter key="10" value="is_quote.true.polynominal.attribute"/>
<parameter key="11" value="is_retweet.true.polynominal.attribute"/>
<parameter key="12" value="favorite_count.true.polynominal.attribute"/>
<parameter key="13" value="retweet_count.true.polynominal.attribute"/>
<parameter key="14" value="hashtags.true.polynominal.attribute"/>
<parameter key="15" value="symbols.true.polynominal.attribute"/>
<parameter key="16" value="urls_url.true.polynominal.attribute"/>
<parameter key="17" value="urls_t\.co.true.polynominal.attribute"/>
<parameter key="18" value="urls_expanded_url.true.polynominal.attribute"/>
<parameter key="19" value="media_url.true.polynominal.attribute"/>
<parameter key="20" value="media_t\.co.true.polynominal.attribute"/>
<parameter key="21" value="media_expanded_url.true.polynominal.attribute"/>
<parameter key="22" value="media_type.true.polynominal.attribute"/>
<parameter key="23" value="ext_media_url.true.polynominal.attribute"/>
<parameter key="24" value="ext_media_t\.co.true.polynominal.attribute"/>
<parameter key="25" value="ext_media_expanded_url.true.polynominal.attribute"/>
<parameter key="26" value="ext_media_type.true.polynominal.attribute"/>
<parameter key="27" value="mentions_user_id.true.polynominal.attribute"/>
<parameter key="28" value="mentions_screen_name.true.polynominal.attribute"/>
<parameter key="29" value="lang.true.polynominal.attribute"/>
<parameter key="30" value="quoted_status_id.true.polynominal.attribute"/>
<parameter key="31" value="quoted_text.true.polynominal.attribute"/>
<parameter key="32" value="quoted_created_at.true.polynominal.attribute"/>
<parameter key="33" value="quoted_source.true.polynominal.attribute"/>
<parameter key="34" value="quoted_favorite_count.true.polynominal.attribute"/>
<parameter key="35" value="quoted_retweet_count.true.polynominal.attribute"/>
<parameter key="36" value="quoted_user_id.true.polynominal.attribute"/>
<parameter key="37" value="quoted_screen_name.true.polynominal.attribute"/>
<parameter key="38" value="quoted_name.true.polynominal.attribute"/>
<parameter key="39" value="quoted_followers_count.true.polynominal.attribute"/>
<parameter key="40" value="quoted_friends_count.true.polynominal.attribute"/>
<parameter key="41" value="quoted_statuses_count.true.polynominal.attribute"/>
<parameter key="42" value="quoted_location.true.polynominal.attribute"/>
<parameter key="43" value="quoted_description.true.polynominal.attribute"/>
<parameter key="44" value="quoted_verified.true.polynominal.attribute"/>
<parameter key="45" value="retweet_status_id.true.polynominal.attribute"/>
<parameter key="46" value="retweet_text.true.polynominal.attribute"/>
<parameter key="47" value="retweet_created_at.true.polynominal.attribute"/>
<parameter key="48" value="retweet_source.true.polynominal.attribute"/>
<parameter key="49" value="retweet_favorite_count.true.polynominal.attribute"/>
<parameter key="50" value="retweet_retweet_count.true.polynominal.attribute"/>
<parameter key="51" value="retweet_user_id.true.polynominal.attribute"/>
<parameter key="52" value="retweet_screen_name.true.polynominal.attribute"/>
<parameter key="53" value="retweet_name.true.polynominal.attribute"/>
<parameter key="54" value="retweet_followers_count.true.polynominal.attribute"/>
<parameter key="55" value="retweet_friends_count.true.polynominal.attribute"/>
<parameter key="56" value="retweet_statuses_count.true.polynominal.attribute"/>
<parameter key="57" value="retweet_location.true.polynominal.attribute"/>
<parameter key="58" value="retweet_description.true.polynominal.attribute"/>
<parameter key="59" value="retweet_verified.true.polynominal.attribute"/>
<parameter key="60" value="place_url.true.polynominal.attribute"/>
<parameter key="61" value="place_name.true.polynominal.attribute"/>
<parameter key="62" value="place_full_name.true.polynominal.attribute"/>
<parameter key="63" value="place_type.true.polynominal.attribute"/>
<parameter key="64" value="country.true.polynominal.attribute"/>
<parameter key="65" value="country_code.true.polynominal.attribute"/>
<parameter key="66" value="geo_coords.true.polynominal.attribute"/>
<parameter key="67" value="coords_coords.true.polynominal.attribute"/>
<parameter key="68" value="bbox_coords.true.polynominal.attribute"/>
<parameter key="69" value="status_url.true.polynominal.attribute"/>
<parameter key="70" value="name.true.polynominal.attribute"/>
<parameter key="71" value="location.true.polynominal.attribute"/>
<parameter key="72" value="description.true.polynominal.attribute"/>
<parameter key="73" value="url.true.polynominal.attribute"/>
<parameter key="74" value="protected.true.polynominal.attribute"/>
<parameter key="75" value="followers_count.true.integer.attribute"/>
<parameter key="76" value="friends_count.true.polynominal.attribute"/>
<parameter key="77" value="listed_count.true.polynominal.attribute"/>
<parameter key="78" value="statuses_count.true.polynominal.attribute"/>
<parameter key="79" value="favourites_count.true.polynominal.attribute"/>
<parameter key="80" value="account_created_at.true.polynominal.attribute"/>
<parameter key="81" value="verified.true.polynominal.attribute"/>
<parameter key="82" value="profile_url.true.polynominal.attribute"/>
<parameter key="83" value="profile_expanded_url.true.polynominal.attribute"/>
<parameter key="84" value="account_lang.true.polynominal.attribute"/>
<parameter key="85" value="profile_banner_url.true.polynominal.attribute"/>
<parameter key="86" value="profile_background_url.true.polynominal.attribute"/>
<parameter key="87" value="profile_image_url.true.polynominal.attribute"/>
<parameter key="88" value="att89.true.polynominal.attribute"/>
<parameter key="89" value="att90.true.polynominal.attribute"/>
</list>
</operator>
<operator activated="true" class="set_role" compatibility="9.0.000" expanded="true" height="82" name="Set Role" width="90" x="45" y="136">
<parameter key="attribute_name" value="text"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="filter_examples" compatibility="9.0.000" expanded="true" height="103" name="Filter Examples" width="90" x="179" y="34">
<list key="filters_list">
<parameter key="filters_entry_key" value="text.is_not_missing."/>
<parameter key="filters_entry_key" value="text.contains.strike"/>
</list>
</operator>
<operator activated="true" class="nominal_to_text" compatibility="9.0.000" expanded="true" height="82" name="Nominal to Text" width="90" x="179" y="187">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="text"/>
<parameter key="include_special_attributes" value="true"/>
</operator>
<operator activated="true" class="textrocess_document_from_data" compatibility="8.1.000" expanded="true" height="82" name="Process Documents from Data" width="90" x="313" y="85">
<parameter key="select_attributes_and_weights" value="true"/>
<list key="specify_weights">
<parameter key="text" value="1.0"/>
</list>
<process expanded="true">
<operator activated="true" class="text:tokenize" compatibility="8.1.000" expanded="true" height="68" name="Tokenize" width="90" x="112" y="34"/>
<operator activated="true" class="text:transform_cases" compatibility="8.1.000" expanded="true" height="68" name="Transform Cases" width="90" x="246" y="34"/>
<operator activated="true" class="text:filter_stopwords_english" compatibility="8.1.000" expanded="true" height="68" name="Filter Stopwords (English)" width="90" x="380" y="34"/>
<connect from_port="document" to_op="Tokenize" to_port="document"/>
<connect from_op="Tokenize" from_port="document" to_op="Transform Cases" to_port="document"/>
<connect from_op="Transform Cases" from_port="document" to_op="Filter Stopwords (English)" to_port="document"/>
<connect from_op="Filter Stopwords (English)" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="text:create_document" compatibility="8.1.000" expanded="true" height="68" name="Create Document" width="90" x="45" y="289">
<parameter key="text" value="My flight has been cancelled I'm very tired because nobody is giving information We have been waiting at the airport for hours I don't want to fly in this company ever again I have a big problem with this cancellation The service is horrible, nobody gives an explanation I'm going on a business trip I'm going on vacation Travelling with family Flight is delayed"/>
</operator>
<operator activated="true" class="textrocess_documents" compatibility="8.1.000" expanded="true" height="103" name="Process Documents" width="90" x="313" y="289">
<process expanded="true">
<operator activated="true" class="text:tokenize" compatibility="8.1.000" expanded="true" height="68" name="Tokenize (2)" width="90" x="112" y="34"/>
<operator activated="true" class="text:transform_cases" compatibility="8.1.000" expanded="true" height="68" name="Transform Cases (2)" width="90" x="246" y="34"/>
<operator activated="true" class="text:filter_stopwords_english" compatibility="8.1.000" expanded="true" height="68" name="Filter Stopwords (2)" width="90" x="380" y="34"/>
<connect from_port="document" to_op="Tokenize (2)" to_port="document"/>
<connect from_op="Tokenize (2)" from_port="document" to_op="Transform Cases (2)" to_port="document"/>
<connect from_op="Transform Cases (2)" from_port="document" to_op="Filter Stopwords (2)" to_port="document"/>
<connect from_op="Filter Stopwords (2)" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="set_role" compatibility="9.0.000" expanded="true" height="82" name="Set Role (4)" width="90" x="447" y="34">
<parameter key="attribute_name" value="user_id"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="concurrency:cross_validation" compatibility="9.0.000" expanded="true" height="145" name="Cross Validation" width="90" x="581" y="34">
<process expanded="true">
<operator activated="true" class="set_role" compatibility="9.0.000" expanded="true" height="82" name="Set Role (3)" width="90" x="44" y="34">
<parameter key="attribute_name" value="user_id"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="naive_bayes" compatibility="9.0.000" expanded="true" height="82" name="Naive Bayes" width="90" x="246" y="34"/>
<connect from_port="training set" to_op="Set Role (3)" to_port="example set input"/>
<connect from_op="Set Role (3)" from_port="example set output" to_op="Naive Bayes" to_port="training set"/>
<connect from_op="Naive Bayes" from_port="model" to_port="model"/>
<portSpacing port="source_training set" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="9.0.000" expanded="true" height="82" name="Apply Model (2)" width="90" x="45" y="34">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="set_role" compatibility="9.0.000" expanded="true" height="82" name="Set Role (2)" width="90" x="112" y="136">
<parameter key="attribute_name" value="user_id"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="performance" compatibility="9.0.000" expanded="true" height="82" name="Performance" width="90" x="246" y="85"/>
<connect from_port="model" to_op="Apply Model (2)" to_port="model"/>
<connect from_port="test set" to_op="Apply Model (2)" to_port="unlabelled data"/>
<connect from_op="Apply Model (2)" from_port="labelled data" to_op="Set Role (2)" to_port="example set input"/>
<connect from_op="Set Role (2)" from_port="example set output" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_port="performance 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_test set results" spacing="0"/>
<portSpacing port="sink_performance 1" spacing="0"/>
<portSpacing port="sink_performance 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="apply_model" compatibility="9.0.000" expanded="true" height="82" name="Apply Model" width="90" x="514" y="289">
<list key="application_parameters"/>
</operator>
<connect from_op="Read CSV" from_port="output" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
<connect from_op="Filter Examples" from_port="example set output" to_op="Nominal to Text" to_port="example set input"/>
<connect from_op="Nominal to Text" from_port="example set output" to_op="Process Documents from Data" to_port="example set"/>
<connect from_op="Process Documents from Data" from_port="example set" to_op="Set Role (4)" to_port="example set input"/>
<connect from_op="Process Documents from Data" from_port="word list" to_op="Process Documents" to_port="word list"/>
<connect from_op="Create Document" from_port="output" to_op="Process Documents" to_port="documents 1"/>
<connect from_op="Process Documents" from_port="example set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Set Role (4)" from_port="example set output" to_op="Cross Validation" to_port="example set"/>
<connect from_op="Cross Validation" from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_op="Cross Validation" from_port="example set" to_port="result 1"/>
<connect from_op="Apply Model" from_port="labelled data" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>