Backward elimination, forward selection and optimize selection operators
I'm trying to use Backward elimination, forward selection and optimize selection operators to find useful predictors for scoring in my data.
I have 50 variables, 1500 samples.
How long will the Backward elimination, forward selection and optimize selection operators work for my data? Is any standards of efficiency for this algorithms ?
I have 50 variables, 1500 samples.
How long will the Backward elimination, forward selection and optimize selection operators work for my data? Is any standards of efficiency for this algorithms ?
Find more posts tagged with
Sort by:
1 - 8 of
81
I'm using computer with 7300 M Memory & 8 core processor with 2 Gh per core .
I'm using Logistic Regression in this way
Thanks, I'll try to use Naive Bayes.
For ending of algorithm with Log Regression i waiting already almost 6 days/
I'm using Logistic Regression in this way
Is this the right way ?
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.0">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" expanded="true" name="Process">
<process expanded="true" height="-20" width="-50">
<operator activated="true" class="optimize_selection_backward" expanded="true" height="94" name="Backward Elimination" width="90" x="150" y="48">
<process expanded="true">
<operator activated="true" class="split_validation" expanded="true" name="Validation">
<parameter key="parallelize_training" value="true"/>
<parameter key="parallelize_testing" value="true"/>
<process expanded="true">
<operator activated="false" class="multiply" expanded="true" name="Multiply"/>
<operator activated="false" class="nominal_to_numerical" expanded="true" name="Nominal to Numerical (2)">
<parameter key="create_view" value="true"/>
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="DEFSTATUS"/>
</operator>
<operator activated="false" class="polynomial_regression" expanded="true" name="Polynomial Regression">
<parameter key="use_local_random_seed" value="true"/>
</operator>
<operator activated="true" class="multiply" expanded="true" name="Multiply (2)"/>
<operator activated="true" class="logistic_regression" expanded="true" name="Logistic Regression">
<parameter key="kernel_type" value="polynomial"/>
<parameter key="convergence_epsilon" value="1.0E-5"/>
</operator>
<connect from_port="training" to_op="Multiply (2)" to_port="input"/>
<connect from_op="Multiply (2)" from_port="output 1" to_port="through 1"/>
<connect from_op="Multiply (2)" from_port="output 2" to_op="Logistic Regression" to_port="training set"/>
<connect from_op="Logistic Regression" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
<portSpacing port="sink_through 2" spacing="0"/>
</process>
<process expanded="true">
<operator activated="false" class="select_attributes" expanded="true" name="Select Attributes (5)">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="DEFSTATUS|AMOUNT_USD|MONTHLY_REPAYMENT_USD|TERM|DISB_FEE|MONTHLY_FEE|INTEREST_RATE|AGREEM*ENTER|OWNERSHIP*POST|ENTER"/>
<parameter key="include_special_attributes" value="true"/>
</operator>
<operator activated="true" class="multiply" expanded="true" name="Multiply (3)"/>
<operator activated="true" class="apply_model" expanded="true" name="Apply Model (3)">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance" expanded="true" name="Selection_Training"/>
<operator activated="true" class="apply_model" expanded="true" name="Apply Model">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance" expanded="true" name="Test"/>
<connect from_port="model" to_op="Multiply (3)" to_port="input"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_port="through 1" to_op="Apply Model (3)" to_port="unlabelled data"/>
<connect from_op="Multiply (3)" from_port="output 1" to_op="Apply Model" to_port="model"/>
<connect from_op="Multiply (3)" from_port="output 2" to_op="Apply Model (3)" to_port="model"/>
<connect from_op="Apply Model (3)" from_port="labelled data" to_op="Selection_Training" to_port="labelled data"/>
<connect from_op="Selection_Training" from_port="performance" to_port="averagable 2"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Test" to_port="labelled data"/>
<connect from_op="Test" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="source_through 2" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
<portSpacing port="sink_averagable 3" spacing="0"/>
</process>
</operator>
<connect from_port="example set" to_op="Validation" to_port="training"/>
<connect from_op="Validation" from_port="averagable 2" to_port="performance"/>
<portSpacing port="source_example set" spacing="0"/>
<portSpacing port="sink_performance" spacing="0"/>
</process>
</operator>
<operator activated="true" class="split_validation" expanded="true" height="130" name="Validation (2)" width="90" x="378" y="111">
<process expanded="true">
<operator activated="false" class="multiply" expanded="true" name="Multiply (4)"/>
<operator activated="false" class="nominal_to_numerical" expanded="true" name="Nominal to Numerical (5)">
<parameter key="create_view" value="true"/>
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="DEFSTATUS"/>
</operator>
<operator activated="false" class="polynomial_regression" expanded="true" name="Polynomial Regression (2)">
<parameter key="use_local_random_seed" value="true"/>
</operator>
<operator activated="true" class="multiply" expanded="true" name="Multiply (5)"/>
<operator activated="true" class="logistic_regression" expanded="true" name="Logistic Regression (2)">
<parameter key="kernel_type" value="polynomial"/>
<parameter key="convergence_epsilon" value="1.0E-5"/>
</operator>
<connect from_port="training" to_op="Multiply (5)" to_port="input"/>
<connect from_op="Multiply (5)" from_port="output 1" to_port="through 1"/>
<connect from_op="Multiply (5)" from_port="output 2" to_op="Logistic Regression (2)" to_port="training set"/>
<connect from_op="Logistic Regression (2)" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
<portSpacing port="sink_through 2" spacing="0"/>
</process>
<process expanded="true">
<operator activated="false" class="select_attributes" expanded="true" name="Select Attributes (7)">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="DEFSTATUS|AMOUNT_USD|MONTHLY_REPAYMENT_USD|TERM|DISB_FEE|MONTHLY_FEE|INTEREST_RATE|AGREEM*ENTER|OWNERSHIP*POST|ENTER"/>
<parameter key="include_special_attributes" value="true"/>
</operator>
<operator activated="true" class="multiply" expanded="true" name="Multiply (6)"/>
<operator activated="true" class="apply_model" expanded="true" name="Apply Model (4)">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance" expanded="true" name="Training (2)"/>
<operator activated="true" class="apply_model" expanded="true" name="Apply Model (5)">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance" expanded="true" name="Test (2)"/>
<connect from_port="model" to_op="Multiply (6)" to_port="input"/>
<connect from_port="test set" to_op="Apply Model (5)" to_port="unlabelled data"/>
<connect from_port="through 1" to_op="Apply Model (4)" to_port="unlabelled data"/>
<connect from_op="Multiply (6)" from_port="output 1" to_op="Apply Model (5)" to_port="model"/>
<connect from_op="Multiply (6)" from_port="output 2" to_op="Apply Model (4)" to_port="model"/>
<connect from_op="Apply Model (4)" from_port="labelled data" to_op="Training (2)" to_port="labelled data"/>
<connect from_op="Training (2)" from_port="performance" to_port="averagable 2"/>
<connect from_op="Apply Model (5)" from_port="labelled data" to_op="Test (2)" to_port="labelled data"/>
<connect from_op="Test (2)" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="source_through 2" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
<portSpacing port="sink_averagable 3" spacing="0"/>
</process>
</operator>
<connect from_port="input 1" to_op="Backward Elimination" to_port="example set"/>
<connect from_op="Backward Elimination" from_port="example set" to_op="Validation (2)" to_port="training"/>
<connect from_op="Backward Elimination" from_port="attribute weights" to_port="result 1"/>
<connect from_op="Backward Elimination" from_port="performance" to_port="result 2"/>
<connect from_op="Validation (2)" from_port="model" to_port="result 5"/>
<connect from_op="Validation (2)" from_port="averagable 1" to_port="result 3"/>
<connect from_op="Validation (2)" from_port="averagable 2" to_port="result 4"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
<portSpacing port="sink_result 4" spacing="0"/>
<portSpacing port="sink_result 5" spacing="0"/>
<portSpacing port="sink_result 6" spacing="0"/>
</process>
</operator>
</process>
Thanks, I'll try to use Naive Bayes.
For ending of algorithm with Log Regression i waiting already almost 6 days/
Kind regards,
Tobias