How to split a sentence by conjunction words in sentence?

HeikoeWin786
HeikoeWin786 New Altair Community Member
edited November 5 in Community Q&A
Hi,

I want to split the sentence using split operator. But, not with regular expression.
I want the sentence to be split by conjunction words e.g. and, but, however, yet
E.g. The staff is bad, and the room is not good, but the food is great.
This sentence should split as
The staff is bad
the room is not good
the food is great

Can anyone suggest me how I can achieve this in rapidminer?

thanks a lot
Tagged:

Answers

  • Robi_Me
    Robi_Me New Altair Community Member
    edited January 2021
    <?xml version="1.0" encoding="UTF-8"?><process version="9.7.002">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="9.7.002" expanded="true" name="Process">
        <parameter key="logverbosity" value="init"/>
        <parameter key="random_seed" value="2001"/>
        <parameter key="send_mail" value="never"/>
        <parameter key="notification_email" value=""/>
        <parameter key="process_duration_for_mail" value="30"/>
        <parameter key="encoding" value="SYSTEM"/>
        <process expanded="true">
          <operator activated="true" class="utility:create_exampleset" compatibility="9.7.002" expanded="true" height="68" name="Create ExampleSet" width="90" x="179" y="289">
            <parameter key="generator_type" value="comma separated text"/>
            <parameter key="number_of_examples" value="100"/>
            <parameter key="use_stepsize" value="false"/>
            <list key="function_descriptions"/>
            <parameter key="add_id_attribute" value="false"/>
            <list key="numeric_series_configuration"/>
            <list key="date_series_configuration"/>
            <list key="date_series_configuration (interval)"/>
            <parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
            <parameter key="time_zone" value="SYSTEM"/>
            <parameter key="input_csv_text" value="conjunction,replace&#10;also,§also&#10;besides,besides&#10;furthermore,§furthermore&#10;likewise,§likewise&#10;moreover,§moreover&#10;however,§however&#10;nevertheless,§nevertheless&#10;nonetheless,§nonetheless&#10;still,§still&#10;conversely,§conversely&#10;instead,§instead&#10;otherwise,§instead&#10;rather,§instead&#10;accordingly,§instead&#10;consequently,§instead&#10;hence,§instead&#10;meanwhile,§instead&#10;then,§instead&#10;therefore,§instead&#10;thus,§instead&#10;and,§and&#10;but,§but"/>
            <parameter key="column_separator" value=","/>
            <parameter key="parse_all_as_nominal" value="false"/>
            <parameter key="decimal_point_character" value="."/>
            <parameter key="trim_attribute_names" value="true"/>
          </operator>
          <operator activated="true" class="utility:create_exampleset" compatibility="9.7.002" expanded="true" height="68" name="Create ExampleSet (2)" width="90" x="179" y="136">
            <parameter key="generator_type" value="attribute functions"/>
            <parameter key="number_of_examples" value="1"/>
            <parameter key="use_stepsize" value="false"/>
            <list key="function_descriptions">
              <parameter key="text" value="&quot;The staff is bad, and the room is not good, but the food is great.&quot;"/>
            </list>
            <parameter key="add_id_attribute" value="false"/>
            <list key="numeric_series_configuration"/>
            <list key="date_series_configuration"/>
            <list key="date_series_configuration (interval)"/>
            <parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
            <parameter key="time_zone" value="SYSTEM"/>
            <parameter key="input_csv_text" value="text&#10;The staff is bad, and the room is not good, but the food is great."/>
            <parameter key="column_separator" value=","/>
            <parameter key="parse_all_as_nominal" value="false"/>
            <parameter key="decimal_point_character" value="."/>
            <parameter key="trim_attribute_names" value="true"/>
          </operator>
          <operator activated="true" class="replace_dictionary" compatibility="9.7.002" expanded="true" height="103" name="Replace (Dictionary)" width="90" x="380" y="187">
            <parameter key="return_preprocessing_model" value="false"/>
            <parameter key="create_view" value="false"/>
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="text"/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="attribute_value"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="time"/>
            <parameter key="block_type" value="attribute_block"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="value_matrix_row_start"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="true"/>
            <parameter key="from_attribute" value="conjunction"/>
            <parameter key="to_attribute" value="replace"/>
            <parameter key="use_regular_expressions" value="false"/>
            <parameter key="convert_to_lowercase" value="false"/>
            <parameter key="first_match_only" value="false"/>
          </operator>
          <operator activated="true" class="split" compatibility="9.7.002" expanded="true" height="82" name="Split" width="90" x="514" y="187">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="text"/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="nominal"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="file_path"/>
            <parameter key="block_type" value="single_value"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="single_value"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
            <parameter key="split_pattern" value="§"/>
            <parameter key="split_mode" value="ordered_split"/>
          </operator>
          <connect from_op="Create ExampleSet" from_port="output" to_op="Replace (Dictionary)" to_port="dictionary"/>
          <connect from_op="Create ExampleSet (2)" from_port="output" to_op="Replace (Dictionary)" to_port="example set input"/>
          <connect from_op="Replace (Dictionary)" from_port="example set output" to_op="Split" to_port="example set input"/>
          <connect from_op="Split" from_port="example set output" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
        </process>
      </operator>
    </process>