🎉Community Raffle - Win $25

An exclusive raffle opportunity for active members like you! Complete your profile, answer questions and get your first accepted badge to enter the raffle.
Join and Win

Deep Learning Extension Problem with modified classification Word2Vec

User: "mmarag"
New Altair Community Member
Updated by Jocelyn

Hello and congratulations for the good job implementing DeepLearning4j into RM.

I have tried to slightly change the ready-made process of Classification of IMDB reviews using Word2Vec with classification of BBC RSS Feeds. 

 

I have this error:

  • Exception: java.lang.NullPointerException
  • Message: null
  • Stack trace:
  • com.rapidminer.example.Example.getNominalValue(Example.java:97)
  • com.rapidminer.extension.deeplearning.tools.LabeledTextProvider.nextSentence(LabeledTextProvider.java:45)
  • org.deeplearning4j.iterator.CnnSentenceDataSetIterator.preLoadTokens(CnnSentenceDataSetIterator.java:211)
  • org.deeplearning4j.iterator.CnnSentenceDataSetIterator.hasNext(CnnSentenceDataSetIterator.java:201)
  • com.rapidminer.extension.deeplearning.ioobjects.TensorIOObject.(TensorIOObject.java:62)
  • com.rapidminer.extension.deeplearning.operators.WordEmbeddingOperator.doWork(WordEmbeddingOperator.java:118)
  • com.rapidminer.operator.Operator.execute(Operator.java:1025)
  • com.rapidminer.operator.execution.SimpleUnitExecutor.execute(SimpleUnitExecutor.java:77)
  • com.rapidminer.operator.ExecutionUnit$2.run(ExecutionUnit.java:812)
  • com.rapidminer.operator.ExecutionUnit$2.run(ExecutionUnit.java:807)
  • java.security.AccessController.doPrivileged(Native Method)
  • com.rapidminer.operator.ExecutionUnit.execute(ExecutionUnit.java:807)
  • com.rapidminer.operator.OperatorChain.doWork(OperatorChain.java:428)
  • com.rapidminer.operator.Operator.execute(Operator.java:1025)
  • com.rapidminer.Process.execute(Process.java:1322)
  • com.rapidminer.Process.run(Process.java:1297)
  • com.rapidminer.Process.run(Process.java:1183)
  • com.rapidminer.Process.run(Process.java:1136)
  • com.rapidminer.Process.run(Process.java:1131)
  • com.rapidminer.Process.run(Process.java:1121)
  • com.rapidminer.gui.ProcessThread.run(ProcessThread.java:65)

my process' XML looks like this:

 

<?xml version="1.0" encoding="UTF-8"?><process version="9.0.002">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="9.0.002" expanded="true" name="Process" origin="GENERATED_SAMPLE">
    <process expanded="true">
      <operator activated="true" class="open_file" compatibility="9.0.002" expanded="true" height="68" name="Open File" origin="GENERATED_SAMPLE" width="90" x="45" y="748">
        <parameter key="filename" value="C:\Users\mmara\Downloads\GoogleNews-vectors-negative300.bin.gz"/>
      </operator>
      <operator activated="true" class="multiply" compatibility="9.0.002" expanded="true" height="103" name="Multiply" origin="GENERATED_SAMPLE" width="90" x="179" y="646"/>
      <operator activated="true" class="web:read_rss" compatibility="7.3.000" expanded="true" height="68" name="Read RSS Feed" width="90" x="45" y="34">
        <parameter key="url" value="http://feeds.bbci.co.uk/news/business/rss.xml"/>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="9.0.002" expanded="true" height="82" name="Select Attributes" width="90" x="45" y="136">
        <parameter key="attribute_filter_type" value="single"/>
        <parameter key="attribute" value="Content"/>
      </operator>
      <operator activated="true" class="generate_attributes" compatibility="9.0.002" expanded="true" height="82" name="Generate Attributes" width="90" x="179" y="34">
        <list key="function_descriptions">
          <parameter key="class" value="&quot;business&quot;"/>
        </list>
      </operator>
      <operator activated="true" class="set_role" compatibility="9.0.002" expanded="true" height="82" name="Set Role" width="90" x="179" y="136">
        <parameter key="attribute_name" value="class"/>
        <parameter key="target_role" value="label"/>
        <list key="set_additional_roles"/>
      </operator>
      <operator activated="true" class="web:read_rss" compatibility="7.3.000" expanded="true" height="68" name="Read RSS Feed (2)" width="90" x="45" y="340">
        <parameter key="url" value="http://feeds.bbci.co.uk/news/technology/rss.xml"/>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="9.0.002" expanded="true" height="82" name="Select Attributes (2)" width="90" x="45" y="442">
        <parameter key="attribute_filter_type" value="single"/>
        <parameter key="attribute" value="Content"/>
      </operator>
      <operator activated="true" class="generate_attributes" compatibility="9.0.002" expanded="true" height="82" name="Generate Attributes (2)" width="90" x="246" y="340">
        <list key="function_descriptions">
          <parameter key="class" value="&quot;technology&quot;"/>
        </list>
      </operator>
      <operator activated="true" class="set_role" compatibility="9.0.002" expanded="true" height="82" name="Set Role (2)" width="90" x="179" y="493">
        <parameter key="attribute_name" value="class"/>
        <parameter key="target_role" value="label"/>
        <list key="set_additional_roles"/>
      </operator>
      <operator activated="true" class="union" compatibility="9.0.002" expanded="true" height="82" name="Union" width="90" x="112" y="238"/>
      <operator activated="true" breakpoints="after" class="text_to_nominal" compatibility="9.0.002" expanded="true" height="82" name="Text to Nominal" width="90" x="246" y="238">
        <parameter key="attribute_filter_type" value="single"/>
        <parameter key="attribute" value="Content"/>
        <parameter key="include_special_attributes" value="true"/>
      </operator>
      <operator activated="true" class="split_data" compatibility="9.0.002" expanded="true" height="103" name="Split Data" origin="GENERATED_SAMPLE" width="90" x="380" y="289">
        <enumeration key="partitions">
          <parameter key="ratio" value="0.8"/>
          <parameter key="ratio" value="0.2"/>
        </enumeration>
        <parameter key="sampling_type" value="shuffled sampling"/>
      </operator>
      <operator activated="true" class="deeplearning:dl4j_word_embedding" compatibility="0.8.000" expanded="true" height="82" name="Text to Numbers using Word2Vec" origin="GENERATED_SAMPLE" width="90" x="380" y="85">
        <parameter key="text_attribute" value="Content"/>
        <parameter key="label_attribute" value="class"/>
        <parameter key="max._sentence_length" value="10"/>
        <description align="center" color="transparent" colored="false" width="126">Convert training sentences to numbers.</description>
      </operator>
      <operator activated="true" class="deeplearning:dl4j_word_embedding" compatibility="0.8.000" expanded="true" height="82" name="Text to Numbers using Word2Vec (2)" origin="GENERATED_SAMPLE" width="90" x="447" y="493">
        <parameter key="text_attribute" value="Content"/>
        <parameter key="label_attribute" value="class"/>
        <parameter key="max._sentence_length" value="10"/>
        <description align="center" color="transparent" colored="false" width="126">Convert testing sentences to numbers.</description>
      </operator>
      <operator activated="true" class="deeplearning:dl4j_tensor_sequential_neural_network" compatibility="0.8.000" expanded="true" height="103" name="Deep Learning on Tensors" origin="GENERATED_SAMPLE" width="90" x="514" y="85">
        <parameter key="use_miniBatch" value="true"/>
        <parameter key="updater" value="Nesterovs"/>
        <parameter key="learning_rate" value="0.1"/>
        <parameter key="infer_input_shape" value="false"/>
        <parameter key="network_type" value="Convolutional"/>
        <parameter key="height" value="10"/>
        <parameter key="width" value="300"/>
        <parameter key="depth" value="1"/>
        <process expanded="true">
          <operator activated="true" class="deeplearning:dl4j_convolutional_layer" compatibility="0.8.000" expanded="true" height="68" name="Add Convolutional Layer" origin="GENERATED_SAMPLE" width="90" x="179" y="34">
            <parameter key="kernel_size" value="2.2"/>
            <parameter key="stride_size" value="1.1"/>
            <parameter key="layer_name" value="conv"/>
            <description align="center" color="transparent" colored="false" width="126">3, 300 Kernel --&amp;gt; 3 regular kernel size; 300 number of dimensions from Googles word2vec model</description>
          </operator>
          <operator activated="true" class="deeplearning:dl4j_global_pooling_layer" compatibility="0.8.000" expanded="true" height="68" name="Add Global Pooling Layer" origin="GENERATED_SAMPLE" width="90" x="380" y="34"/>
          <operator activated="true" class="deeplearning:dl4j_dense_layer" compatibility="0.8.000" expanded="true" height="68" name="Add Dense Layer" origin="GENERATED_SAMPLE" width="90" x="581" y="34">
            <parameter key="number_of_neurons" value="2"/>
            <parameter key="activation_function" value="Softmax"/>
            <description align="center" color="transparent" colored="false" width="126">2 classes --&amp;gt; 2 neurons with softmax</description>
          </operator>
          <connect from_port="layerArchitecture" to_op="Add Convolutional Layer" to_port="layerArchitecture"/>
          <connect from_op="Add Convolutional Layer" from_port="layerArchitecture" to_op="Add Global Pooling Layer" to_port="layerArchitecture"/>
          <connect from_op="Add Global Pooling Layer" from_port="layerArchitecture" to_op="Add Dense Layer" to_port="layerArchitecture"/>
          <connect from_op="Add Dense Layer" from_port="layerArchitecture" to_port="layerArchitecture"/>
          <portSpacing port="source_layerArchitecture" spacing="0"/>
          <portSpacing port="sink_layerArchitecture" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="deeplearning:dl4j_apply_tensor_model" compatibility="0.8.000" expanded="true" height="82" name="Apply Model on Tensor" origin="GENERATED_SAMPLE" width="90" x="648" y="187"/>
      <operator activated="true" class="performance_binominal_classification" compatibility="9.0.002" expanded="true" height="82" name="Performance" origin="GENERATED_SAMPLE" width="90" x="782" y="187"/>
      <connect from_op="Open File" from_port="file" to_op="Multiply" to_port="input"/>
      <connect from_op="Multiply" from_port="output 1" to_op="Text to Numbers using Word2Vec (2)" to_port="file with word2vec model"/>
      <connect from_op="Multiply" from_port="output 2" to_op="Text to Numbers using Word2Vec" to_port="file with word2vec model"/>
      <connect from_op="Read RSS Feed" from_port="output" to_op="Select Attributes" to_port="example set input"/>
      <connect from_op="Select Attributes" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
      <connect from_op="Generate Attributes" from_port="example set output" to_op="Set Role" to_port="example set input"/>
      <connect from_op="Set Role" from_port="example set output" to_op="Union" to_port="example set 1"/>
      <connect from_op="Read RSS Feed (2)" from_port="output" to_op="Select Attributes (2)" to_port="example set input"/>
      <connect from_op="Select Attributes (2)" from_port="example set output" to_op="Generate Attributes (2)" to_port="example set input"/>
      <connect from_op="Generate Attributes (2)" from_port="example set output" to_op="Set Role (2)" to_port="example set input"/>
      <connect from_op="Set Role (2)" from_port="example set output" to_op="Union" to_port="example set 2"/>
      <connect from_op="Union" from_port="union" to_op="Text to Nominal" to_port="example set input"/>
      <connect from_op="Text to Nominal" from_port="example set output" to_op="Split Data" to_port="example set"/>
      <connect from_op="Split Data" from_port="partition 1" to_op="Text to Numbers using Word2Vec" to_port="example set"/>
      <connect from_op="Split Data" from_port="partition 2" to_op="Text to Numbers using Word2Vec (2)" to_port="example set"/>
      <connect from_op="Text to Numbers using Word2Vec" from_port="tensor" to_op="Deep Learning on Tensors" to_port="training set"/>
      <connect from_op="Text to Numbers using Word2Vec (2)" from_port="tensor" to_op="Apply Model on Tensor" to_port="unlabelled tensor"/>
      <connect from_op="Deep Learning on Tensors" from_port="model" to_op="Apply Model on Tensor" to_port="model"/>
      <connect from_op="Apply Model on Tensor" from_port="labeled data" to_op="Performance" to_port="labelled data"/>
      <connect from_op="Performance" from_port="performance" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process> 

 

 

I am using the 1.2GB Google file for the lexicon.

 

Regards

Manolis