🎉Community Raffle - Win $25

An exclusive raffle opportunity for active members like you! Complete your profile, answer questions and get your first accepted badge to enter the raffle.
Join and Win

Text Mining Words not produced

User: "asn4293"
New Altair Community Member
Updated by Jocelyn

So I have to find the number of specific words occurences in the excel file. For example I have ten columns in which it have in each row we have one employee. Each employee profile may contain one word which we want to find with number of occurences.

<?xml version="1.0" encoding="UTF-8"?><process version="8.1.000">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="8.1.000" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="read_excel" compatibility="8.1.000" expanded="true" height="68" name="Read Excel" width="90" x="45" y="187">
<parameter key="excel_file" value="C:\Users\shahida1\Desktop\Sample Testing Text Mining.xlsx"/>
<parameter key="imported_cell_range" value="A1:F36"/>
<parameter key="first_row_as_names" value="false"/>
<list key="annotations">
<parameter key="0" value="Name"/>
</list>
<list key="data_set_meta_data_information">
<parameter key="0" value="User Sys ID.true.integer.id"/>
<parameter key="1" value="Employee Id.true.integer.attribute"/>
<parameter key="2" value="Last name.true.polynominal.attribute"/>
<parameter key="3" value="First name.true.polynominal.attribute"/>
<parameter key="4" value="Business Unit.true.polynominal.attribute"/>
<parameter key="5" value="Functional Area.true.polynominal.attribute"/>
</list>
</operator>
<operator activated="true" class="text:data_to_documents" compatibility="8.1.000" expanded="true" height="68" name="Data to Documents" width="90" x="246" y="187">
<parameter key="select_attributes_and_weights" value="true"/>
<list key="specify_weights"/>
</operator>
<operator activated="true" class="text:process_documents" compatibility="8.1.000" expanded="true" height="103" name="Process Documents" width="90" x="447" y="187">
<parameter key="vector_creation" value="Binary Term Occurrences"/>
<process expanded="true">
<operator activated="true" class="text:tokenize" compatibility="8.1.000" expanded="true" height="68" name="Tokenize" width="90" x="112" y="34"/>
<operator activated="true" class="text:transform_cases" compatibility="8.1.000" expanded="true" height="68" name="Transform Cases" width="90" x="179" y="238"/>
<operator activated="true" class="text:filter_stopwords_english" compatibility="8.1.000" expanded="true" height="68" name="Filter Stopwords (English)" width="90" x="313" y="238"/>
<connect from_port="document" to_op="Tokenize" to_port="document"/>
<connect from_op="Tokenize" from_port="document" to_op="Transform Cases" to_port="document"/>
<connect from_op="Transform Cases" from_port="document" to_op="Filter Stopwords (English)" to_port="document"/>
<connect from_op="Filter Stopwords (English)" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="read_excel" compatibility="8.1.000" expanded="true" height="68" name="Read Excel (2)" width="90" x="112" y="442">
<parameter key="excel_file" value="C:\Users\shahida1\Desktop\Sample Testing Text Mining.xlsx"/>
<parameter key="sheet_number" value="2"/>
<parameter key="imported_cell_range" value="A1:A2"/>
<parameter key="first_row_as_names" value="false"/>
<list key="annotations">
<parameter key="0" value="Name"/>
</list>
<list key="data_set_meta_data_information">
<parameter key="0" value="Matching Text.true.polynominal.attribute"/>
</list>
</operator>
<operator activated="true" class="select_attributes" compatibility="8.1.000" expanded="true" height="82" name="Select Attributes" width="90" x="246" y="442">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="Matching Text"/>
</operator>
<operator activated="true" class="operator_toolbox:filter_tokens_using_exampleset" compatibility="1.0.000" expanded="true" height="82" name="Filter Tokens Using ExampleSet" width="90" x="581" y="340"/>
<connect from_op="Read Excel" from_port="output" to_op="Data to Documents" to_port="example set"/>
<connect from_op="Data to Documents" from_port="documents" to_op="Process Documents" to_port="documents 1"/>
<connect from_op="Process Documents" from_port="example set" to_op="Filter Tokens Using ExampleSet" to_port="example set"/>
<connect from_op="Read Excel (2)" from_port="output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Filter Tokens Using ExampleSet" to_port="document"/>
<connect from_op="Filter Tokens Using ExampleSet" from_port="document" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>

 
This is the file which I am working on.
https://drive.google.com/open?id=1PT38TyBmoIHDfIVCAk7Je3l3AXRP-4Gi

Find more posts tagged with

Sort by:
1 - 1 of 11
    User: "lionelderkrikor"
    New Altair Community Member
    Accepted Answer

    Hi again @asn4293,

     

    How said in my previous post, I added the Extract Macro operator to define automatically the keyword in the Aggregate operator.

    Here the new release : 

    <?xml version="1.0" encoding="UTF-8"?><process version="8.1.003">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="8.1.003" expanded="true" name="Process">
    <process expanded="true">
    <operator activated="true" class="read_excel" compatibility="8.1.003" expanded="true" height="68" name="Read Excel" width="90" x="112" y="34">
    <parameter key="excel_file" value="C:\Users\Lionel\Documents\Formations_DataScience\Rapidminer\Tests_Rapidminer\Count_Keyword\Sample Testing Text Mining.xlsx"/>
    <parameter key="imported_cell_range" value="A1:F36"/>
    <parameter key="first_row_as_names" value="false"/>
    <list key="annotations">
    <parameter key="0" value="Name"/>
    </list>
    <list key="data_set_meta_data_information">
    <parameter key="0" value="User Sys ID.true.integer.attribute"/>
    <parameter key="1" value="Employee Id.true.integer.attribute"/>
    <parameter key="2" value="Last name.true.polynominal.attribute"/>
    <parameter key="3" value="First name.true.polynominal.attribute"/>
    <parameter key="4" value="Business Unit.true.polynominal.attribute"/>
    <parameter key="5" value="Functional Area.true.text.attribute"/>
    </list>
    </operator>
    <operator activated="true" class="read_excel" compatibility="8.1.003" expanded="true" height="68" name="Read Excel (2)" width="90" x="45" y="187">
    <parameter key="excel_file" value="C:\Users\Lionel\Documents\Formations_DataScience\Rapidminer\Tests_Rapidminer\Count_Keyword\Sample Testing Text Mining.xlsx"/>
    <parameter key="sheet_number" value="2"/>
    <parameter key="imported_cell_range" value="A1:A2"/>
    <parameter key="first_row_as_names" value="false"/>
    <list key="annotations">
    <parameter key="0" value="Name"/>
    </list>
    <list key="data_set_meta_data_information">
    <parameter key="0" value="Matching Text.true.text.attribute"/>
    </list>
    </operator>
    <operator activated="true" class="extract_macro" compatibility="8.1.003" expanded="true" height="68" name="Extract Macro" width="90" x="179" y="187">
    <parameter key="macro" value="keyWord"/>
    <parameter key="macro_type" value="data_value"/>
    <parameter key="attribute_name" value="Matching Text"/>
    <parameter key="example_index" value="1"/>
    <list key="additional_macros"/>
    </operator>
    <operator activated="true" class="text:process_document_from_data" compatibility="8.1.000" expanded="true" height="82" name="Process Documents from Data (2)" width="90" x="313" y="187">
    <parameter key="keep_text" value="true"/>
    <list key="specify_weights"/>
    <process expanded="true">
    <operator activated="true" class="text:tokenize" compatibility="8.1.000" expanded="true" height="68" name="Tokenize (2)" width="90" x="246" y="34"/>
    <connect from_port="document" to_op="Tokenize (2)" to_port="document"/>
    <connect from_op="Tokenize (2)" from_port="document" to_port="document 1"/>
    <portSpacing port="source_document" spacing="0"/>
    <portSpacing port="sink_document 1" spacing="0"/>
    <portSpacing port="sink_document 2" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="text:process_document_from_data" compatibility="8.1.000" expanded="true" height="82" name="Process Documents from Data" width="90" x="380" y="34">
    <parameter key="vector_creation" value="Term Occurrences"/>
    <parameter key="keep_text" value="true"/>
    <list key="specify_weights"/>
    <process expanded="true">
    <operator activated="true" class="text:tokenize" compatibility="8.1.000" expanded="true" height="68" name="Tokenize" width="90" x="246" y="34"/>
    <operator activated="true" class="text:filter_stopwords_english" compatibility="8.1.000" expanded="true" height="68" name="Filter Stopwords (English)" width="90" x="380" y="34"/>
    <connect from_port="document" to_op="Tokenize" to_port="document"/>
    <connect from_op="Tokenize" from_port="document" to_op="Filter Stopwords (English)" to_port="document"/>
    <connect from_op="Filter Stopwords (English)" from_port="document" to_port="document 1"/>
    <portSpacing port="source_document" spacing="0"/>
    <portSpacing port="sink_document 1" spacing="0"/>
    <portSpacing port="sink_document 2" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="aggregate" compatibility="8.1.003" expanded="true" height="82" name="Aggregate" width="90" x="514" y="34">
    <list key="aggregation_attributes">
    <parameter key="%{keyWord}" value="sum"/>
    </list>
    </operator>
    <connect from_op="Read Excel" from_port="output" to_op="Process Documents from Data" to_port="example set"/>
    <connect from_op="Read Excel (2)" from_port="output" to_op="Extract Macro" to_port="example set"/>
    <connect from_op="Extract Macro" from_port="example set" to_op="Process Documents from Data (2)" to_port="example set"/>
    <connect from_op="Process Documents from Data (2)" from_port="example set" to_port="result 3"/>
    <connect from_op="Process Documents from Data (2)" from_port="word list" to_op="Process Documents from Data" to_port="word list"/>
    <connect from_op="Process Documents from Data" from_port="example set" to_op="Aggregate" to_port="example set input"/>
    <connect from_op="Process Documents from Data" from_port="word list" to_port="result 2"/>
    <connect from_op="Aggregate" from_port="example set output" to_port="result 1"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    <portSpacing port="sink_result 3" spacing="0"/>
    <portSpacing port="sink_result 4" spacing="0"/>
    </process>
    </operator>
    </process>

    I hope it helps,

     

    Regards,

     

     

    Lionel