🎉Community Raffle - Win $25

An exclusive raffle opportunity for active members like you! Complete your profile, answer questions and get your first accepted badge to enter the raffle.
Join and Win

Classifiying text data using SVM

User: "preethy"
New Altair Community Member
Updated by Jocelyn
Hello,

I have a process chain which takes in documents from files and does some preprocessing on them. This is given as input to the validator which has a modelling operator. I am not able to use SVM here. Could you please tell me how can I use SVM here?
<?xml version="1.0" encoding="UTF-8" standalone="no"?> I am attaching my process XML here.


<process version="5.3.013">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="5.3.013" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="text:process_document_from_file" compatibility="5.3.002" expanded="true" height="76" name="Process Documents from Files (2)" width="90" x="45" y="30">
        <list key="text_directories">
          <parameter key="bus_stop" value="C:\Users\w44ylqrl\Citizen\Data\bmtc\categories\bus_stop"/>
          <parameter key="crew_misbehaviour" value="C:\Users\w44ylqrl\Citizen\Data\bmtc\categories\crew_misbehaviour"/>
          <parameter key="frequency" value="C:\Users\w44ylqrl\Citizen\Data\bmtc\categories\frequency"/>
          <parameter key="shelter" value="C:\Users\w44ylqrl\Citizen\Data\bmtc\categories\shelter"/>
        </list>
        <parameter key="datamanagement" value="double_array"/>
        <process expanded="true">
          <operator activated="true" class="text:filter_tokens_by_pos" compatibility="5.3.002" expanded="true" height="60" name="Filter Tokens (2)" width="90" x="45" y="30">
            <parameter key="expression" value="NNP.*"/>
            <parameter key="invert_filter" value="true"/>
          </operator>
          <operator activated="true" class="text:replace_tokens" compatibility="5.3.002" expanded="true" height="60" name="Replace Tokens (2)" width="90" x="180" y="30">
            <list key="replace_dictionary">
              <parameter key="(https|http|ftp|file)://[-a-zA-Z0-9+&amp;@#/%?=~_|!:,.;]*[-a-zA-Z0-9+&amp;@#/%=~_|]" value="="/>
              <parameter key="&amp;amp;" value="="/>
              <parameter key="@[-a-zA-Z0-9+&amp;amp;@#/%?=~_|!:,.;]*" value="="/>
              <parameter key="[^A-Za-z]" value="="/>
              <parameter key="\s[\s]+" value="="/>
            </list>
          </operator>
          <operator activated="true" class="text:tokenize" compatibility="5.3.002" expanded="true" height="60" name="Tokenize (2)" width="90" x="315" y="30">
            <parameter key="characters" value=".  &quot; ! ? -"/>
          </operator>
          <operator activated="true" class="text:transform_cases" compatibility="5.3.002" expanded="true" height="60" name="Transform Cases (2)" width="90" x="450" y="30"/>
          <operator activated="true" class="text:filter_stopwords_dictionary" compatibility="5.3.002" expanded="true" height="76" name="Filter Stopwords (Dictionary)" width="90" x="45" y="120">
            <parameter key="file" value="C:\Users\w44ylqrl\CitiZen_Java\org.xerox.citizen.categorizzation\data\stopWordList2.csv"/>
          </operator>
          <operator activated="true" class="text:remove_document_parts" compatibility="5.3.002" expanded="true" height="60" name="Remove Document Parts" width="90" x="180" y="120">
            <parameter key="deletion_regex" value="="/>
          </operator>
          <operator activated="true" class="text:stem_porter" compatibility="5.3.002" expanded="true" height="60" name="Stem (Porter)" width="90" x="315" y="120"/>
          <operator activated="true" class="text:filter_stopwords_dictionary" compatibility="5.3.002" expanded="true" height="76" name="Filter Stopwords (2)" width="90" x="447" y="120">
            <parameter key="file" value="C:\Users\w44ylqrl\CitiZen_Java\org.xerox.citizen.categorizzation\data\correctedDomainWords.csv"/>
          </operator>
          <operator activated="true" class="text:filter_by_length" compatibility="5.3.002" expanded="true" height="60" name="Filter Tokens (3)" width="90" x="45" y="255">
            <parameter key="max_chars" value="15"/>
          </operator>
          <operator activated="true" class="text:generate_n_grams_terms" compatibility="5.3.002" expanded="true" height="60" name="Generate n-Grams (Terms)" width="90" x="447" y="255">
            <parameter key="max_length" value="3"/>
          </operator>
          <connect from_port="document" to_op="Filter Tokens (2)" to_port="document"/>
          <connect from_op="Filter Tokens (2)" from_port="document" to_op="Replace Tokens (2)" to_port="document"/>
          <connect from_op="Replace Tokens (2)" from_port="document" to_op="Tokenize (2)" to_port="document"/>
          <connect from_op="Tokenize (2)" from_port="document" to_op="Transform Cases (2)" to_port="document"/>
          <connect from_op="Transform Cases (2)" from_port="document" to_op="Filter Stopwords (Dictionary)" to_port="document"/>
          <connect from_op="Filter Stopwords (Dictionary)" from_port="document" to_op="Remove Document Parts" to_port="document"/>
          <connect from_op="Remove Document Parts" from_port="document" to_op="Stem (Porter)" to_port="document"/>
          <connect from_op="Stem (Porter)" from_port="document" to_op="Filter Stopwords (2)" to_port="document"/>
          <connect from_op="Filter Stopwords (2)" from_port="document" to_op="Filter Tokens (3)" to_port="document"/>
          <connect from_op="Filter Tokens (3)" from_port="document" to_op="Generate n-Grams (Terms)" to_port="document"/>
          <connect from_op="Generate n-Grams (Terms)" from_port="document" to_port="document 1"/>
          <portSpacing port="source_document" spacing="0"/>
          <portSpacing port="sink_document 1" spacing="0"/>
          <portSpacing port="sink_document 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="5.3.013" expanded="true" height="76" name="Select Attributes" width="90" x="246" y="210">
        <parameter key="attribute_filter_type" value="no_missing_values"/>
      </operator>
      <operator activated="true" class="set_role" compatibility="5.3.013" expanded="true" height="76" name="Set Role" width="90" x="380" y="210">
        <parameter key="attribute_name" value="label"/>
        <parameter key="target_role" value="label"/>
        <list key="set_additional_roles"/>
      </operator>
      <operator activated="true" class="x_validation" compatibility="5.3.013" expanded="true" height="130" name="Validation" width="90" x="313" y="480">
        <process expanded="true">
          <operator activated="true" class="support_vector_machine_linear" compatibility="5.3.013" expanded="true" height="76" name="SVM (Linear)" width="90" x="112" y="300"/>
          <connect from_port="training" to_op="SVM (Linear)" to_port="training set"/>
          <connect from_op="SVM (Linear)" from_port="model" to_port="model"/>
          <portSpacing port="source_training" spacing="0"/>
          <portSpacing port="sink_model" spacing="0"/>
          <portSpacing port="sink_through 1" spacing="0"/>
        </process>
        <process expanded="true">
          <operator activated="true" class="apply_model" compatibility="5.3.013" expanded="true" height="76" name="Apply Model" width="90" x="45" y="30">
            <list key="application_parameters"/>
          </operator>
          <operator activated="true" class="performance_classification" compatibility="5.3.013" expanded="true" height="76" name="Performance" width="90" x="174" y="30">
            <list key="class_weights"/>
          </operator>
          <connect from_port="model" to_op="Apply Model" to_port="model"/>
          <connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
          <connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
          <connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
          <portSpacing port="source_model" spacing="0"/>
          <portSpacing port="source_test set" spacing="0"/>
          <portSpacing port="source_through 1" spacing="0"/>
          <portSpacing port="sink_averagable 1" spacing="0"/>
          <portSpacing port="sink_averagable 2" spacing="0"/>
          <portSpacing port="sink_averagable 3" spacing="0"/>
        </process>
      </operator>
      <connect from_port="input 1" to_op="Process Documents from Files (2)" to_port="word list"/>
      <connect from_op="Process Documents from Files (2)" from_port="example set" to_op="Select Attributes" to_port="example set input"/>
      <connect from_op="Process Documents from Files (2)" from_port="word list" to_port="result 1"/>
      <connect from_op="Select Attributes" from_port="example set output" to_op="Set Role" to_port="example set input"/>
      <connect from_op="Set Role" from_port="example set output" to_op="Validation" to_port="training"/>
      <connect from_op="Validation" from_port="model" to_port="result 2"/>
      <connect from_op="Validation" from_port="training" to_port="result 3"/>
      <connect from_op="Validation" from_port="averagable 1" to_port="result 4"/>
      <connect from_op="Validation" from_port="averagable 2" to_port="result 5"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="source_input 2" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="0"/>
      <portSpacing port="sink_result 4" spacing="0"/>
      <portSpacing port="sink_result 5" spacing="0"/>
      <portSpacing port="sink_result 6" spacing="0"/>
    </process>
  </operator>
</process>

Comments

No comments on this post.