🎉Community Raffle - Win $25

An exclusive raffle opportunity for active members like you! Complete your profile, answer questions and get your first accepted badge to enter the raffle.
Join and Win

Scoring text against a semantic dictionary

User: "robin"
New Altair Community Member
Updated by Jocelyn

Hi

 

I am attempting to run a process where I score text against a semantic dictionary (I have attached an example), I am pulling the word count out as well as loading the dictionary and then getting myself in a complete knot as to the calculation of the result. 

 

The process should be as follows:

  1. Load the text
  2. Select the text
  3. Load the dictionary
  4. Perform a word frequency count
  5. Multiply the word frequency by the dictionary result and divide by the total word count

Semantic dictionaries are different to sentiment dicitonaries as they calculate the volume of word types being used in the text. Also in a semantic dictionary a word can belong to multiple segments and is not only either positive or negative. 

 

I have also attached the process as far as I have managed to get so far. 

Find more posts tagged with

Sort by:
1 - 1 of 11
    User: "Thomas_Ott"
    New Altair Community Member
    Accepted Answer

    Take a look at the sample process below. You'll need to modify it with your dictionary and have your pronouns, verbs, etc all add up to one. This process was developed by my colleague, so he annotated the process as well to help you along. 

     

    <?xml version="1.0" encoding="UTF-8"?><process version="7.4.000">
    <context>
    <input/>
    <output/>
    <macros/>
    </context>
    <operator activated="true" class="process" compatibility="7.4.000" expanded="true" name="Process">
    <parameter key="encoding" value="SYSTEM"/>
    <process expanded="true">
    <operator activated="true" class="subprocess" compatibility="7.4.000" expanded="true" height="82" name="Subprocess" width="90" x="45" y="136">
    <process expanded="true">
    <operator activated="true" class="generate_data_user_specification" compatibility="7.4.000" expanded="true" height="68" name="Generate Data by User Specification" width="90" x="45" y="34">
    <list key="attribute_values">
    <parameter key="Text" value="&quot;good&quot;"/>
    <parameter key="Weight" value="1"/>
    </list>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="generate_data_user_specification" compatibility="7.4.000" expanded="true" height="68" name="Generate Data by User Specification (2)" width="90" x="45" y="136">
    <list key="attribute_values">
    <parameter key="Text" value="&quot;bad&quot;"/>
    <parameter key="Weight" value="-1.5"/>
    </list>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="append" compatibility="7.4.000" expanded="true" height="103" name="Append" width="90" x="179" y="34"/>
    <connect from_op="Generate Data by User Specification" from_port="output" to_op="Append" to_port="example set 1"/>
    <connect from_op="Generate Data by User Specification (2)" from_port="output" to_op="Append" to_port="example set 2"/>
    <connect from_op="Append" from_port="merged set" to_port="out 1"/>
    <portSpacing port="source_in 1" spacing="0"/>
    <portSpacing port="sink_out 1" spacing="0"/>
    <portSpacing port="sink_out 2" spacing="0"/>
    </process>
    <description align="center" color="transparent" colored="false" width="126">Create a dummy dictionary</description>
    </operator>
    <operator activated="true" class="generate_attributes" compatibility="7.4.000" expanded="true" height="82" name="Generate Attributes (2)" width="90" x="179" y="136">
    <list key="function_descriptions">
    <parameter key="Weight" value="1/Weight"/>
    </list>
    <description align="center" color="transparent" colored="false" width="126">Invert all Weights for the Linear Regression</description>
    </operator>
    <operator activated="true" class="generate_id" compatibility="7.4.000" expanded="true" height="82" name="Generate ID" width="90" x="313" y="136"/>
    <operator activated="true" class="pivot" compatibility="7.4.000" expanded="true" height="82" name="Pivot" width="90" x="447" y="136">
    <parameter key="group_attribute" value="id"/>
    <parameter key="index_attribute" value="Text"/>
    <parameter key="skip_constant_attributes" value="false"/>
    </operator>
    <operator activated="true" class="rename_by_replacing" compatibility="7.4.000" expanded="true" height="82" name="Rename by Replacing" width="90" x="581" y="136">
    <parameter key="replace_what" value="Weight_(.+)"/>
    <parameter key="replace_by" value="$1"/>
    </operator>
    <operator activated="true" class="replace_missing_values" compatibility="7.4.000" expanded="true" height="103" name="Replace Missing Values" width="90" x="715" y="136">
    <parameter key="default" value="zero"/>
    <list key="columns"/>
    </operator>
    <operator activated="true" class="generate_attributes" compatibility="7.4.000" expanded="true" height="82" name="Generate Attributes" width="90" x="849" y="136">
    <list key="function_descriptions">
    <parameter key="label" value="1"/>
    </list>
    </operator>
    <operator activated="true" class="set_role" compatibility="7.4.000" expanded="true" height="82" name="Set Role" width="90" x="983" y="136">
    <parameter key="attribute_name" value="label"/>
    <parameter key="target_role" value="label"/>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="vector_linear_regression" compatibility="7.4.000" expanded="true" height="82" name="Vector Linear Regression" width="90" x="1117" y="136">
    <parameter key="use_bias" value="false"/>
    </operator>
    <operator activated="true" class="generate_data_user_specification" compatibility="7.4.000" expanded="true" height="68" name="Generate Data by User Specification (3)" width="90" x="1050" y="340">
    <list key="attribute_values">
    <parameter key="Text" value="&quot;This is a good Text&quot;"/>
    </list>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="generate_data_user_specification" compatibility="7.4.000" expanded="true" height="68" name="Generate Data by User Specification (4)" width="90" x="1050" y="442">
    <list key="attribute_values">
    <parameter key="Text" value="&quot;This is a bad Text&quot;"/>
    </list>
    <list key="set_additional_roles"/>
    </operator>
    <operator activated="true" class="append" compatibility="7.4.000" expanded="true" height="103" name="Append (2)" width="90" x="1184" y="391"/>
    <operator activated="true" class="nominal_to_text" compatibility="7.4.000" expanded="true" height="82" name="Nominal to Text" width="90" x="1318" y="391">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="Text"/>
    </operator>
    <operator activated="true" class="text:process_document_from_data" compatibility="7.4.001" expanded="true" height="82" name="Process Documents from Data" width="90" x="1452" y="391">
    <parameter key="vector_creation" value="Term Occurrences"/>
    <list key="specify_weights"/>
    <process expanded="true">
    <operator activated="true" class="text:tokenize" compatibility="7.4.001" expanded="true" height="68" name="Tokenize" width="90" x="179" y="85"/>
    <operator activated="true" class="text:transform_cases" compatibility="7.4.001" expanded="true" height="68" name="Transform Cases" width="90" x="380" y="85"/>
    <connect from_port="document" to_op="Tokenize" to_port="document"/>
    <connect from_op="Tokenize" from_port="document" to_op="Transform Cases" to_port="document"/>
    <connect from_op="Transform Cases" from_port="document" to_port="document 1"/>
    <portSpacing port="source_document" spacing="0"/>
    <portSpacing port="sink_document 1" spacing="0"/>
    <portSpacing port="sink_document 2" spacing="0"/>
    </process>
    </operator>
    <operator activated="true" class="apply_model" compatibility="7.4.000" expanded="true" height="82" name="Apply Model" width="90" x="1586" y="136">
    <list key="application_parameters"/>
    </operator>
    <connect from_op="Subprocess" from_port="out 1" to_op="Generate Attributes (2)" to_port="example set input"/>
    <connect from_op="Generate Attributes (2)" from_port="example set output" to_op="Generate ID" to_port="example set input"/>
    <connect from_op="Generate ID" from_port="example set output" to_op="Pivot" to_port="example set input"/>
    <connect from_op="Pivot" from_port="example set output" to_op="Rename by Replacing" to_port="example set input"/>
    <connect from_op="Rename by Replacing" from_port="example set output" to_op="Replace Missing Values" to_port="example set input"/>
    <connect from_op="Replace Missing Values" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
    <connect from_op="Generate Attributes" from_port="example set output" to_op="Set Role" to_port="example set input"/>
    <connect from_op="Set Role" from_port="example set output" to_op="Vector Linear Regression" to_port="training set"/>
    <connect from_op="Vector Linear Regression" from_port="model" to_op="Apply Model" to_port="model"/>
    <connect from_op="Generate Data by User Specification (3)" from_port="output" to_op="Append (2)" to_port="example set 1"/>
    <connect from_op="Generate Data by User Specification (4)" from_port="output" to_op="Append (2)" to_port="example set 2"/>
    <connect from_op="Append (2)" from_port="merged set" to_op="Nominal to Text" to_port="example set input"/>
    <connect from_op="Nominal to Text" from_port="example set output" to_op="Process Documents from Data" to_port="example set"/>
    <connect from_op="Process Documents from Data" from_port="example set" to_op="Apply Model" to_port="unlabelled data"/>
    <connect from_op="Apply Model" from_port="labelled data" to_port="result 1"/>
    <portSpacing port="source_input 1" spacing="0"/>
    <portSpacing port="sink_result 1" spacing="0"/>
    <portSpacing port="sink_result 2" spacing="0"/>
    <description align="left" color="yellow" colored="false" height="248" resized="true" width="543" x="281" y="32">Built a table like&lt;br&gt;&lt;br&gt;good ................. bad&lt;br&gt;1/1 ..................... 0&lt;br&gt;0 ......................... 1/-1.5</description>
    <description align="center" color="yellow" colored="false" height="247" resized="true" width="265" x="845" y="12">Generate a constant label of 1</description>
    <description align="center" color="yellow" colored="false" height="271" resized="true" width="578" x="1017" y="287">Build and process test data</description>
    <description align="center" color="red" colored="true" height="140" resized="true" width="706" x="55" y="423">This process creates a scoring model from an annotated dictionary.&lt;br/&gt;This is a technique used e.g. for sentiment analysis where you assign a value for each word. In this case we have a dummy data set with &amp;quot;good&amp;quot; and &amp;quot;bad&amp;quot; annotated with 1 and -1.5 respectivly.</description>
    </process>
    </operator>
    </process>