🎉Community Raffle - Win $25

An exclusive raffle opportunity for active members like you! Complete your profile, answer questions and get your first accepted badge to enter the raffle.
Join and Win

( Clustering) with (association rules)

User: "Ebte"
New Altair Community Member
Updated by Jocelyn
How can I do ( clustering) with (association rules) together in  RapidMiner ?
Sort by:
1 - 1 of 11
    User: "David_A"
    New Altair Community Member
    Accepted Answer
    Hi @Ebte ,

    in that case you can do the following:

    Run the clusting with any algorithm you like (for example k-means or x-means, if you don't know a good number of clusters in advance).

    Then you use the Loop Values operator to iterate over the different values of the new cluster attribute. Inside the loop you use the Filter Examples operator to select only those examples that lie in the same cluster, therefor you have to use the iteration_macro (check this video if you haven't used macros before: https://academy.rapidminer.com/learn/video/macros-introduction).
    Then on those subsets you can create your association rules.

    Check this sample process below as an example.

    I hope that helps,
    David



    <?xml version="1.0" encoding="UTF-8"?><process version="9.1.000">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process" origin="GENERATED_TUTORIAL">
        <parameter key="logverbosity" value="init"/>
        <parameter key="random_seed" value="2001"/>
        <parameter key="send_mail" value="never"/>
        <parameter key="notification_email" value=""/>
        <parameter key="process_duration_for_mail" value="30"/>
        <parameter key="encoding" value="SYSTEM"/>
        <process expanded="true">
          <operator activated="true" class="retrieve" compatibility="9.1.000" expanded="true" height="68" name="Iris" origin="GENERATED_TUTORIAL" width="90" x="45" y="136">
            <parameter key="repository_entry" value="//Samples/data/Iris"/>
          </operator>
          <operator activated="true" class="concurrency:k_means" compatibility="9.1.000" expanded="true" height="82" name="Clustering" width="90" x="179" y="136">
            <parameter key="add_cluster_attribute" value="true"/>
            <parameter key="add_as_label" value="false"/>
            <parameter key="remove_unlabeled" value="false"/>
            <parameter key="k" value="3"/>
            <parameter key="max_runs" value="10"/>
            <parameter key="determine_good_start_values" value="true"/>
            <parameter key="measure_types" value="NumericalMeasures"/>
            <parameter key="mixed_measure" value="MixedEuclideanDistance"/>
            <parameter key="nominal_measure" value="NominalDistance"/>
            <parameter key="numerical_measure" value="EuclideanDistance"/>
            <parameter key="divergence" value="SquaredEuclideanDistance"/>
            <parameter key="kernel_type" value="radial"/>
            <parameter key="kernel_gamma" value="1.0"/>
            <parameter key="kernel_sigma1" value="1.0"/>
            <parameter key="kernel_sigma2" value="0.0"/>
            <parameter key="kernel_sigma3" value="2.0"/>
            <parameter key="kernel_degree" value="3.0"/>
            <parameter key="kernel_shift" value="1.0"/>
            <parameter key="kernel_a" value="1.0"/>
            <parameter key="kernel_b" value="0.0"/>
            <parameter key="max_optimization_steps" value="100"/>
            <parameter key="use_local_random_seed" value="false"/>
            <parameter key="local_random_seed" value="1992"/>
            <description align="center" color="green" colored="true" width="126">Make any type of clustering.&lt;br&gt;This creates a new attribute:&lt;br&gt;&amp;quot;cluster&amp;quot;</description>
          </operator>
          <operator activated="true" class="discretize_by_frequency" compatibility="7.1.001" expanded="true" height="103" name="Discretize by Frequency" origin="GENERATED_TUTORIAL" width="90" x="380" y="136">
            <parameter key="return_preprocessing_model" value="false"/>
            <parameter key="create_view" value="false"/>
            <parameter key="attribute_filter_type" value="all"/>
            <parameter key="attribute" value=""/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="numeric"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="real"/>
            <parameter key="block_type" value="value_series"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="value_series_end"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
            <parameter key="use_sqrt_of_examples" value="false"/>
            <parameter key="number_of_bins" value="5"/>
            <parameter key="range_name_type" value="short"/>
            <parameter key="automatic_number_of_digits" value="true"/>
            <parameter key="number_of_digits" value="-1"/>
          </operator>
          <operator activated="true" class="nominal_to_binominal" compatibility="7.1.001" expanded="true" height="103" name="Nominal to Binominal" origin="GENERATED_TUTORIAL" width="90" x="514" y="136">
            <parameter key="return_preprocessing_model" value="false"/>
            <parameter key="create_view" value="false"/>
            <parameter key="attribute_filter_type" value="all"/>
            <parameter key="attribute" value=""/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="nominal"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="file_path"/>
            <parameter key="block_type" value="single_value"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="single_value"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
            <parameter key="transform_binominal" value="true"/>
            <parameter key="use_underscore_in_name" value="true"/>
          </operator>
          <operator activated="true" class="concurrency:loop_values" compatibility="9.1.000" expanded="true" height="82" name="Loop Values" width="90" x="715" y="136">
            <parameter key="attribute" value="cluster"/>
            <parameter key="iteration_macro" value="loop_value"/>
            <parameter key="reuse_results" value="false"/>
            <parameter key="enable_parallel_execution" value="true"/>
            <process expanded="true">
              <operator activated="true" class="filter_examples" compatibility="9.1.000" expanded="true" height="103" name="Filter Examples" width="90" x="112" y="34">
                <parameter key="parameter_expression" value=""/>
                <parameter key="condition_class" value="custom_filters"/>
                <parameter key="invert_filter" value="false"/>
                <list key="filters_list">
                  <parameter key="filters_entry_key" value="cluster.equals.%{loop_value}"/>
                </list>
                <parameter key="filters_logic_and" value="true"/>
                <parameter key="filters_check_metadata" value="true"/>
                <description align="center" color="purple" colored="true" width="126">Select only those examples that belong to a certain cluster.&lt;br&gt;See the filter function where during run time the macro %{loop_value}&lt;br/&gt;is repleaced with &amp;quot;cluster_0&amp;quot;, &amp;quot;cluster_1&amp;quot; and &amp;quot;cluster_3&amp;quot;</description>
              </operator>
              <operator activated="true" class="concurrency:fp_growth" compatibility="9.1.000" expanded="true" height="82" name="FPGrowth" origin="GENERATED_TUTORIAL" width="90" x="447" y="85">
                <parameter key="input_format" value="items in dummy coded columns"/>
                <parameter key="item_separators" value="|"/>
                <parameter key="use_quotes" value="false"/>
                <parameter key="quotes_character" value="&quot;"/>
                <parameter key="escape_character" value="\"/>
                <parameter key="trim_item_names" value="true"/>
                <parameter key="min_requirement" value="support"/>
                <parameter key="min_support" value="0.1"/>
                <parameter key="min_frequency" value="100"/>
                <parameter key="min_items_per_itemset" value="1"/>
                <parameter key="max_items_per_itemset" value="0"/>
                <parameter key="max_number_of_itemsets" value="1000000"/>
                <parameter key="find_min_number_of_itemsets" value="false"/>
                <parameter key="min_number_of_itemsets" value="1"/>
                <parameter key="max_number_of_retries" value="15"/>
                <parameter key="requirement_decrease_factor" value="0.9"/>
                <enumeration key="must_contain_list"/>
              </operator>
              <operator activated="true" class="create_association_rules" compatibility="9.1.000" expanded="true" height="82" name="Create Association Rules" origin="GENERATED_TUTORIAL" width="90" x="581" y="85">
                <parameter key="criterion" value="confidence"/>
                <parameter key="min_confidence" value="0.8"/>
                <parameter key="min_criterion_value" value="0.8"/>
                <parameter key="gain_theta" value="2.0"/>
                <parameter key="laplace_k" value="1.0"/>
              </operator>
              <connect from_port="input 1" to_op="Filter Examples" to_port="example set input"/>
              <connect from_op="Filter Examples" from_port="example set output" to_op="FPGrowth" to_port="example set"/>
              <connect from_op="FPGrowth" from_port="frequent sets" to_op="Create Association Rules" to_port="item sets"/>
              <connect from_op="Create Association Rules" from_port="rules" to_port="output 1"/>
              <portSpacing port="source_input 1" spacing="0"/>
              <portSpacing port="source_input 2" spacing="0"/>
              <portSpacing port="sink_output 1" spacing="0"/>
              <portSpacing port="sink_output 2" spacing="0"/>
              <description align="center" color="green" colored="true" height="106" resized="true" width="306" x="433" y="203">Create the association rules</description>
            </process>
            <description align="center" color="red" colored="true" width="126">Loop or iterate over the different values for the &amp;quot;cluster&amp;quot; attribute.&lt;br&gt;The iteration macro is then used to filter the examples inside</description>
          </operator>
          <connect from_op="Iris" from_port="output" to_op="Clustering" to_port="example set"/>
          <connect from_op="Clustering" from_port="clustered set" to_op="Discretize by Frequency" to_port="example set input"/>
          <connect from_op="Discretize by Frequency" from_port="example set output" to_op="Nominal to Binominal" to_port="example set input"/>
          <connect from_op="Nominal to Binominal" from_port="example set output" to_op="Loop Values" to_port="input 1"/>
          <connect from_op="Loop Values" from_port="output 1" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="90"/>
          <portSpacing port="sink_result 2" spacing="0"/>
          <description align="center" color="yellow" colored="false" height="96" resized="true" width="230" x="388" y="249">Just create some more attributes,&lt;br/&gt;by splitting the numerical values into ranges</description>
        </process>
      </operator>
    </process>