Trouble with Optimizing a Model

User: "sum_one"
New Altair Community Member
Updated by Jocelyn
Dear RM Community,

I am currently engaged in a project to better understand account retention.  I would like to create a decision tree model that intelligently categorizes the types of customers that close their accounts after 12 months.  This will hopefully give us a better look at which customers we might need to reach out to in order to retain their business.  However, along the way, I have hit some bumps in the road with my coding and am in need of some help.  I have heard great things about this community and its helpfulness, so hopefully we can work together to push past this road block.

My issues, broken down into three segments, lead into one another and are as follows:

Optimized Parameters
  • Using the Optimize Parameters (Evolutionary) process, I was able to determine the optimal parameters to maximize weighted mean recall and weighted mean precision
  • Issue: I want weighted mean recall and weighted mean precision to be optimized at the same time and not have a main criterion (not a big issue if this cannot be done so long as both weighted means are significant)

Replication
  • After I have determine the optimized parameters, I then attempted to recreate the same model that produced the optimized results by using the optimized parameters in a new decision tree
      o To my understanding, theoretically this should produce a similar tree as well as similar results but will vary because the training set will be slightly different
      o Results and tree change drastically for the worse
  • Issue: I want to be able to find an optimized model and be able to replicate/create a new model for future use

Accuracy
  • Now that I have successfully partially replicated the optimized model and produced the performance of the model, the overall strength of the model has decreased
  • Issue: the model needs to be able to be replicated and perform just as well as when it was optimized

Thank you for any and all help.  I greatly appreciate it.

Cheers,

Jerrod



<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="6.3.000">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="6.3.000" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="read_csv" compatibility="6.3.000" expanded="true" height="60" name="Read CSV" width="90" x="112" y="30">
        <parameter key="csv_file" value="Z:\Marketing\Staff\Jerrod Johnson\MOB18_v2.csv"/>
        <parameter key="column_separators" value=","/>
        <parameter key="first_row_as_names" value="false"/>
        <list key="annotations">
          <parameter key="0" value="Name"/>
        </list>
        <parameter key="encoding" value="windows-1252"/>
        <list key="data_set_meta_data_information">
          <parameter key="0" value="Region.true.polynominal.attribute"/>
          <parameter key="1" value="Line Of Business.true.polynominal.attribute"/>
          <parameter key="2" value="Company Type.true.polynominal.attribute"/>
          <parameter key="3" value="Revenue (US Dollars; million).true.real.attribute"/>
          <parameter key="4" value="Total Employees.true.real.attribute"/>
          <parameter key="5" value="NAICS DESC (JH).true.polynominal.attribute"/>
          <parameter key="6" value="ProductClass.true.polynominal.attribute"/>
          <parameter key="7" value="StatusOpen.true.real.attribute"/>
        </list>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="6.3.000" expanded="true" height="76" name="Select Attributes" width="90" x="112" y="120">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attributes" value="Company Type|NAICS DESC (JH)|Region|Revenue (US Dollars; million)|StatusOpen|Total Employees|Line Of Business|ProductClass"/>
      </operator>
      <operator activated="true" class="set_role" compatibility="6.3.000" expanded="true" height="76" name="Set Role" width="90" x="112" y="255">
        <parameter key="attribute_name" value="StatusOpen"/>
        <parameter key="target_role" value="label"/>
        <list key="set_additional_roles"/>
      </operator>
      <operator activated="true" class="numerical_to_polynominal" compatibility="6.3.000" expanded="true" height="76" name="Numerical to Polynominal" width="90" x="112" y="390">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attributes" value="StatusOpen"/>
        <parameter key="include_special_attributes" value="true"/>
      </operator>
      <operator activated="true" class="optimize_parameters_evolutionary" compatibility="6.3.000" expanded="true" height="112" name="Optimize Parameters (Evolutionary)" width="90" x="380" y="165">
        <list key="parameters">
          <parameter key="Learner Evo.maximal_depth" value="[1;20]"/>
          <parameter key="Learner Evo.confidence" value="[1.0E-7;0.5]"/>
          <parameter key="Learner Evo.minimal_gain" value="[0.005;.15]"/>
          <parameter key="Learner Evo.minimal_leaf_size" value="[1;8]"/>
          <parameter key="Learner Evo.minimal_size_for_split" value="[1.0;6]"/>
          <parameter key="Learner Evo.number_of_prepruning_alternatives" value="[1;6]"/>
          <parameter key="Validation Evo.split_ratio" value="[0.6;.9]"/>
        </list>
        <parameter key="max_generations" value="100"/>
        <parameter key="population_size" value="50"/>
        <parameter key="use_local_random_seed" value="true"/>
        <parameter key="show_convergence_plot" value="true"/>
        <process expanded="true">
          <operator activated="true" class="split_validation" compatibility="6.3.000" expanded="true" height="112" name="Validation Evo" width="90" x="447" y="30">
            <parameter key="split_ratio" value="0.8931198308911473"/>
            <process expanded="true">
              <operator activated="true" class="parallel_decision_tree" compatibility="6.3.000" expanded="true" height="76" name="Learner Evo" width="90" x="179" y="30">
                <parameter key="criterion" value="information_gain"/>
                <parameter key="maximal_depth" value="14"/>
                <parameter key="confidence" value="0.06462070743893004"/>
                <parameter key="minimal_gain" value="0.005"/>
                <parameter key="minimal_leaf_size" value="4"/>
                <parameter key="minimal_size_for_split" value="3"/>
                <parameter key="number_of_prepruning_alternatives" value="6"/>
              </operator>
              <connect from_port="training" to_op="Learner Evo" to_port="training set"/>
              <connect from_op="Learner Evo" from_port="model" to_port="model"/>
              <portSpacing port="source_training" spacing="0"/>
              <portSpacing port="sink_model" spacing="0"/>
              <portSpacing port="sink_through 1" spacing="0"/>
            </process>
            <process expanded="true">
              <operator activated="true" class="apply_model" compatibility="6.3.000" expanded="true" height="76" name="Apply Model (6)" width="90" x="112" y="30">
                <list key="application_parameters"/>
              </operator>
              <operator activated="true" class="performance_classification" compatibility="6.3.000" expanded="true" height="76" name="Performance (6)" width="90" x="246" y="30">
                <parameter key="accuracy" value="false"/>
                <parameter key="weighted_mean_recall" value="true"/>
                <parameter key="weighted_mean_precision" value="true"/>
                <list key="class_weights"/>
              </operator>
              <connect from_port="model" to_op="Apply Model (6)" to_port="model"/>
              <connect from_port="test set" to_op="Apply Model (6)" to_port="unlabelled data"/>
              <connect from_op="Apply Model (6)" from_port="labelled data" to_op="Performance (6)" to_port="labelled data"/>
              <connect from_op="Performance (6)" from_port="performance" to_port="averagable 1"/>
              <portSpacing port="source_model" spacing="0"/>
              <portSpacing port="source_test set" spacing="0"/>
              <portSpacing port="source_through 1" spacing="0"/>
              <portSpacing port="sink_averagable 1" spacing="0"/>
              <portSpacing port="sink_averagable 2" spacing="0"/>
            </process>
          </operator>
          <connect from_port="input 1" to_op="Validation Evo" to_port="training"/>
          <connect from_op="Validation Evo" from_port="model" to_port="result 1"/>
          <connect from_op="Validation Evo" from_port="averagable 1" to_port="performance"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="source_input 2" spacing="0"/>
          <portSpacing port="sink_performance" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="set_parameters" compatibility="6.3.000" expanded="true" height="76" name="Set Parameters" width="90" x="514" y="75">
        <list key="name_map">
          <parameter key="Learner Evo" value="OptimalLearner"/>
          <parameter key="Validation Evo" value="OptimValidation"/>
        </list>
      </operator>
      <operator activated="true" class="read_csv" compatibility="6.3.000" expanded="true" height="60" name="Read CSV (3)" width="90" x="380" y="570">
        <parameter key="csv_file" value="Z:\Marketing\Staff\Jerrod Johnson\MOB18_v2.csv"/>
        <parameter key="column_separators" value=","/>
        <parameter key="first_row_as_names" value="false"/>
        <list key="annotations">
          <parameter key="0" value="Name"/>
        </list>
        <parameter key="encoding" value="windows-1252"/>
        <list key="data_set_meta_data_information">
          <parameter key="0" value="Region.true.polynominal.attribute"/>
          <parameter key="1" value="Line Of Business.true.polynominal.attribute"/>
          <parameter key="2" value="Company Type.true.polynominal.attribute"/>
          <parameter key="3" value="Revenue (US Dollars; million).true.real.attribute"/>
          <parameter key="4" value="Total Employees.true.real.attribute"/>
          <parameter key="5" value="NAICS DESC (JH).true.polynominal.attribute"/>
          <parameter key="6" value="ProductClass.true.polynominal.attribute"/>
          <parameter key="7" value="StatusOpen.true.real.attribute"/>
        </list>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="6.3.000" expanded="true" height="76" name="Select Attributes (3)" width="90" x="380" y="480">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attributes" value="Company Type|NAICS DESC (JH)|Region|Revenue (US Dollars; million)|StatusOpen|Total Employees|Line Of Business|ProductClass"/>
      </operator>
      <operator activated="true" class="set_role" compatibility="6.3.000" expanded="true" height="76" name="Set Role (2)" width="90" x="380" y="390">
        <parameter key="attribute_name" value="StatusOpen"/>
        <parameter key="target_role" value="label"/>
        <list key="set_additional_roles"/>
      </operator>
      <operator activated="true" class="numerical_to_polynominal" compatibility="6.3.000" expanded="true" height="76" name="Numerical to Polynominal (2)" width="90" x="380" y="300">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attributes" value="StatusOpen"/>
        <parameter key="include_special_attributes" value="true"/>
      </operator>
      <operator activated="true" class="split_validation" compatibility="6.3.000" expanded="true" height="112" name="OptimValidation" width="90" x="514" y="300">
        <parameter key="split_ratio" value="0.8931198308911473"/>
        <process expanded="true">
          <operator activated="true" class="parallel_decision_tree" compatibility="6.3.000" expanded="true" height="76" name="OptimalLearner" width="90" x="179" y="30">
            <parameter key="maximal_depth" value="14"/>
            <parameter key="confidence" value="0.06462070743893004"/>
            <parameter key="minimal_gain" value="0.005"/>
            <parameter key="minimal_leaf_size" value="4"/>
            <parameter key="minimal_size_for_split" value="3"/>
            <parameter key="number_of_prepruning_alternatives" value="6"/>
          </operator>
          <connect from_port="training" to_op="OptimalLearner" to_port="training set"/>
          <connect from_op="OptimalLearner" from_port="model" to_port="model"/>
          <portSpacing port="source_training" spacing="0"/>
          <portSpacing port="sink_model" spacing="0"/>
          <portSpacing port="sink_through 1" spacing="0"/>
        </process>
        <process expanded="true">
          <operator activated="true" class="apply_model" compatibility="6.3.000" expanded="true" height="76" name="Apply Model (12)" width="90" x="45" y="30">
            <list key="application_parameters"/>
          </operator>
          <operator activated="true" class="performance_classification" compatibility="6.3.000" expanded="true" height="76" name="Performance (12)" width="90" x="313" y="30">
            <parameter key="accuracy" value="false"/>
            <parameter key="weighted_mean_recall" value="true"/>
            <parameter key="weighted_mean_precision" value="true"/>
            <list key="class_weights"/>
          </operator>
          <connect from_port="model" to_op="Apply Model (12)" to_port="model"/>
          <connect from_port="test set" to_op="Apply Model (12)" to_port="unlabelled data"/>
          <connect from_op="Apply Model (12)" from_port="labelled data" to_op="Performance (12)" to_port="labelled data"/>
          <connect from_op="Performance (12)" from_port="performance" to_port="averagable 1"/>
          <portSpacing port="source_model" spacing="0"/>
          <portSpacing port="source_test set" spacing="0"/>
          <portSpacing port="source_through 1" spacing="0"/>
          <portSpacing port="sink_averagable 1" spacing="0"/>
          <portSpacing port="sink_averagable 2" spacing="0"/>
        </process>
      </operator>
      <connect from_op="Read CSV" from_port="output" to_op="Select Attributes" to_port="example set input"/>
      <connect from_op="Select Attributes" from_port="example set output" to_op="Set Role" to_port="example set input"/>
      <connect from_op="Set Role" from_port="example set output" to_op="Numerical to Polynominal" to_port="example set input"/>
      <connect from_op="Numerical to Polynominal" from_port="example set output" to_op="Optimize Parameters (Evolutionary)" to_port="input 1"/>
      <connect from_op="Optimize Parameters (Evolutionary)" from_port="performance" to_port="result 4"/>
      <connect from_op="Optimize Parameters (Evolutionary)" from_port="parameter" to_op="Set Parameters" to_port="parameter set"/>
      <connect from_op="Optimize Parameters (Evolutionary)" from_port="result 1" to_port="result 6"/>
      <connect from_op="Set Parameters" from_port="parameter set" to_port="result 5"/>
      <connect from_op="Read CSV (3)" from_port="output" to_op="Select Attributes (3)" to_port="example set input"/>
      <connect from_op="Select Attributes (3)" from_port="example set output" to_op="Set Role (2)" to_port="example set input"/>
      <connect from_op="Set Role (2)" from_port="example set output" to_op="Numerical to Polynominal (2)" to_port="example set input"/>
      <connect from_op="Numerical to Polynominal (2)" from_port="example set output" to_op="OptimValidation" to_port="training"/>
      <connect from_op="OptimValidation" from_port="model" to_port="result 1"/>
      <connect from_op="OptimValidation" from_port="training" to_port="result 2"/>
      <connect from_op="OptimValidation" from_port="averagable 1" to_port="result 3"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="0"/>
      <portSpacing port="sink_result 4" spacing="0"/>
      <portSpacing port="sink_result 5" spacing="0"/>
      <portSpacing port="sink_result 6" spacing="0"/>
      <portSpacing port="sink_result 7" spacing="0"/>
    </process>
  </operator>
</process>

Find more posts tagged with