🎉Community Raffle - Win $25

An exclusive raffle opportunity for active members like you! Complete your profile, answer questions and get your first accepted badge to enter the raffle.
Join and Win

Making Label an Attribute - Decision Tree Process

User: "Eric005"
New Altair Community Member
Updated by Jocelyn

HI All,


I'm currently working on a presentation piece using time series data for a binary classifier of stock market direction. I generate a custom attribute that makes a True/False indication (Up/Down) of a forward market price (this is under the column as Label2) using actual forward data in the series, and then my standard label attribute is the predicted value through a boosted decision tree.  Here is my question, when I select attributes as a final step going into the validation I will select the market date and market data, and this generally produces about a 74% accuracy.  If I also select the label as an attribute it then produces a 98% accuracy of prediction (which to me is absurd).  So I'm trying to understand the mechanics of what makes listing the label as an attribute have such a radical change in predictions - is the decision tree using previous predictions through the windowing function to influence forward predictions in a sort of looping system?  Does any of this make sense? 

Feedback welcome.  XML below.   

 

Thanks!

Eric

 

<?xml version="1.0" encoding="UTF-8"?><process version="7.4.000">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.4.000" expanded="true" name="Process">
<process expanded="true">
<operator activated="false" class="concurrency:parallel_decision_tree" compatibility="7.4.000" expanded="true" height="82" name="Decision Tree" width="90" x="782" y="391"/>
<operator activated="false" class="series:sliding_window_validation" compatibility="7.4.000" expanded="true" height="103" name="Validation" width="90" x="782" y="289">
<process expanded="true">
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
</process>
</operator>
<operator activated="false" class="linear_regression" compatibility="7.4.000" expanded="true" height="103" name="Linear Regression" width="90" x="514" y="187">
<parameter key="feature_selection" value="greedy"/>
<parameter key="eliminate_colinear_features" value="false"/>
</operator>
<operator activated="false" class="neural_net" compatibility="7.4.000" expanded="true" height="82" name="Neural Net" width="90" x="514" y="289">
<list key="hidden_layers"/>
<parameter key="decay" value="true"/>
</operator>
<operator activated="false" class="bagging" compatibility="7.4.000" expanded="true" height="82" name="Bagging" width="90" x="514" y="391">
<process expanded="true">
<portSpacing port="source_training set" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
</process>
</operator>
<operator activated="false" class="h2o:gradient_boosted_trees" compatibility="7.4.000" expanded="true" height="103" name="Gradient Boosted Trees" width="90" x="514" y="85">
<list key="expert_parameters"/>
</operator>
<operator activated="false" class="apply_model" compatibility="7.4.000" expanded="true" height="82" name="Apply Model" width="90" x="648" y="34">
<list key="application_parameters"/>
</operator>
<operator activated="false" class="performance_binominal_classification" compatibility="7.4.000" expanded="true" height="82" name="Performance (2)" width="90" x="782" y="187">
<parameter key="classification_error" value="true"/>
<parameter key="precision" value="true"/>
<parameter key="false_positive" value="true"/>
<parameter key="false_negative" value="true"/>
<parameter key="true_positive" value="true"/>
<parameter key="true_negative" value="true"/>
<parameter key="positive_predictive_value" value="true"/>
<parameter key="negative_predictive_value" value="true"/>
</operator>
<operator activated="true" class="retrieve" compatibility="7.4.000" expanded="true" height="68" name="Retrieve Date_NDX_SPX_VIX_RUT_DJX_HOLC Data (2)" width="90" x="45" y="238">
<parameter key="repository_entry" value="../Date_NDX_SPX_VIX_RUT_DJX_HOLC Data"/>
</operator>
<operator activated="true" class="quantx1:security_return_operator" compatibility="1.0.006" expanded="true" height="68" name="Differencing" width="90" x="45" y="340">
<parameter key="Price columns" value="VIX Open|VIX Low|VIX High|VIX Close|SPX Open|SPX Low|SPX High|SPX Close|RUT Open|RUT Low|RUT High|RUT Close|NDX Open|NDX Low|NDX High|NDX Close|DJX Open|DJX Low|DJX High|DJX Close"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="7.4.000" expanded="true" height="82" name="Select Attributes" width="90" x="179" y="85">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="SPX Close|SPX High|SPX Low|SPX Open|Date"/>
</operator>
<operator activated="true" class="set_role" compatibility="7.4.000" expanded="true" height="82" name="Set Role" width="90" x="313" y="85">
<parameter key="attribute_name" value="SPX Close"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles">
<parameter key="Date" value="id"/>
<parameter key="SPX Close" value="label"/>
<parameter key="SPX Close" value="regular"/>
</list>
</operator>
<operator activated="true" class="series:windowing" compatibility="7.4.000" expanded="true" height="82" name="Windowing" width="90" x="179" y="187">
<parameter key="window_size" value="10"/>
<parameter key="create_label" value="true"/>
<parameter key="label_attribute" value="SPX Close"/>
<parameter key="label_dimension" value="0"/>
<parameter key="horizon" value="5"/>
</operator>
<operator activated="true" class="generate_attributes" compatibility="7.4.000" expanded="true" height="82" name="Generate Attributes" width="90" x="313" y="187">
<list key="function_descriptions">
<parameter key="label2" value="if(label&gt;[SPX Close-0],TRUE,FALSE)"/>
<parameter key="label3" value="([SPX Close-0]+[SPX Close-1]+[SPX Close-2]+[SPX Close-4]+[SPX Close-5]+[SPX Close-6]+[SPX Close-7])/7"/>
</list>
</operator>
<operator activated="true" class="set_role" compatibility="7.4.000" expanded="true" height="82" name="Set Role (2)" width="90" x="179" y="289">
<parameter key="attribute_name" value="label2"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles">
<parameter key="label3" value="regular"/>
</list>
</operator>
<operator activated="true" class="select_attributes" compatibility="7.4.000" expanded="true" height="82" name="Select Attributes (2)" width="90" x="313" y="289">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="SPX Close-0|SPX Close-1|SPX Close-2|SPX Close-3|SPX Close-4|SPX Close-5|SPX Close-6|SPX Close-7|SPX Close-8|SPX Close-9|SPX High-0|SPX High-1|SPX High-2|SPX High-3|SPX High-4|SPX High-5|SPX High-6|SPX High-7|SPX High-8|SPX High-9|SPX Low-0|SPX Low-1|SPX Low-2|SPX Low-3|SPX Low-4|SPX Low-5|SPX Low-6|SPX Low-7|SPX Low-8|SPX Low-9|SPX Open-0|SPX Open-1|SPX Open-2|SPX Open-3|SPX Open-4|SPX Open-5|SPX Open-6|SPX Open-7|SPX Open-8|SPX Open-9|SPX-9|SPX-8|SPX-7|SPX-6|SPX-5|SPX-4|SPX-3|SPX-2|SPX-1|SPX-0|SPX Open-99|SPX Open-98|SPX Open-97|SPX Open-96|SPX Open-95|SPX Open-94|SPX Open-93|SPX Open-92|SPX Open-91|SPX Open-90|SPX Open-89|SPX Open-88|SPX Open-87|SPX Open-86|SPX Open-85|SPX Open-84|SPX Open-83|SPX Open-82|SPX Open-81|SPX Open-80|SPX Open-79|SPX Open-78|SPX Open-77|SPX Open-76|SPX Open-75|SPX Open-74|SPX Open-73|SPX Open-72|SPX Open-71|SPX Open-70|SPX Open-69|SPX Open-68|SPX Open-67|SPX Open-66|SPX Open-65|SPX Open-64|SPX Open-63|SPX Open-62|SPX Open-61|SPX Open-60|SPX Open-59|SPX Open-58|SPX Open-57|SPX Open-56|SPX Open-55|SPX Open-54|SPX Open-53|SPX Open-52|SPX Open-51|SPX Open-50|SPX Open-49|SPX Open-48|SPX Open-47|SPX Open-46|SPX Open-45|SPX Open-44|SPX Open-43|SPX Open-42|SPX Open-41|SPX Open-40|SPX Open-39|SPX Open-38|SPX Open-37|SPX Open-36|SPX Open-35|SPX Open-34|SPX Open-33|SPX Open-32|SPX Open-31|SPX Open-30|SPX Open-29|SPX Open-28|SPX Open-27|SPX Open-26|SPX Open-25|SPX Open-24|SPX Open-23|SPX Open-22|SPX Open-21|SPX Open-20|SPX Open-19|SPX Open-18|SPX Open-17|SPX Open-16|SPX Open-15|SPX Open-14|SPX Open-13|SPX Open-12|SPX Open-11|SPX Open-10|SPX Low-99|SPX Low-98|SPX Low-97|SPX Low-96|SPX Low-95|SPX Low-94|SPX Low-93|SPX Low-92|SPX Low-91|SPX Low-90|SPX Low-89|SPX Low-88|SPX Low-87|SPX Low-86|SPX Low-85|SPX Low-84|SPX Low-83|SPX Low-82|SPX Low-81|SPX Low-80|SPX Low-79|SPX Low-78|SPX Low-77|SPX Low-76|SPX Low-75|SPX Low-74|SPX Low-73|SPX Low-72|SPX Low-71|SPX Low-70|SPX Low-69|SPX Low-68|SPX Low-67|SPX Low-66|SPX Low-65|SPX Low-64|SPX Low-63|SPX Low-62|SPX Low-61|SPX Low-60|SPX Low-59|SPX Low-58|SPX Low-57|SPX Low-56|SPX Low-55|SPX Low-54|SPX Low-53|SPX Low-52|SPX Low-51|SPX Low-50|SPX Low-49|SPX Low-48|SPX Low-47|SPX Low-46|SPX Low-45|SPX Low-44|SPX Low-43|SPX Low-42|SPX Low-41|SPX Low-40|SPX Low-39|SPX Low-38|SPX Low-37|SPX Low-36|SPX Low-35|SPX Low-34|SPX Low-33|SPX Low-32|SPX Low-31|SPX Low-30|SPX Low-29|SPX Low-28|SPX Low-27|SPX Low-26|SPX Low-25|SPX Low-24|SPX Low-23|SPX Low-22|SPX Low-21|SPX Low-20|SPX Low-19|SPX Low-18|SPX Low-17|SPX Low-16|SPX Low-15|SPX Low-14|SPX Low-13|SPX Low-12|SPX Low-11|SPX Low-10|SPX High-99|SPX High-98|SPX High-97|SPX High-96|SPX High-95|SPX High-94|SPX High-93|SPX High-92|SPX High-91|SPX High-90|SPX High-89|SPX High-88|SPX High-87|SPX High-86|SPX High-85|SPX High-84|SPX High-83|SPX High-82|SPX High-81|SPX High-80|SPX High-79|SPX High-78|SPX High-77|SPX High-76|SPX High-75|SPX High-74|SPX High-73|SPX High-72|SPX High-71|SPX High-70|SPX High-69|SPX High-68|SPX High-67|SPX High-66|SPX High-65|SPX High-64|SPX High-63|SPX High-62|SPX High-61|SPX High-60|SPX High-59|SPX High-58|SPX High-57|SPX High-56|SPX High-55|SPX High-54|SPX High-53|SPX High-52|SPX High-51|SPX High-50|SPX High-49|SPX High-48|SPX High-47|SPX High-46|SPX High-45|SPX High-44|SPX High-43|SPX High-42|SPX High-41|SPX High-40|SPX High-39|SPX High-38|SPX High-37|SPX High-36|SPX High-35|SPX High-34|SPX High-33|SPX High-32|SPX High-31|SPX High-30|SPX High-29|SPX High-28|SPX High-27|SPX High-26|SPX High-25|SPX High-24|SPX High-23|SPX High-22|SPX High-21|SPX High-20|SPX High-19|SPX High-18|SPX High-17|SPX High-16|SPX High-15|SPX High-14|SPX High-13|SPX High-12|SPX High-11|SPX High-10|SPX Close-99|SPX Close-98|SPX Close-97|SPX Close-96|SPX Close-95|SPX Close-94|SPX Close-93|SPX Close-92|SPX Close-91|SPX Close-90|SPX Close-89|SPX Close-88|SPX Close-87|SPX Close-86|SPX Close-85|SPX Close-84|SPX Close-83|SPX Close-82|SPX Close-81|SPX Close-80|SPX Close-79|SPX Close-78|SPX Close-77|SPX Close-76|SPX Close-75|SPX Close-74|SPX Close-73|SPX Close-72|SPX Close-71|SPX Close-70|SPX Close-69|SPX Close-68|SPX Close-67|SPX Close-66|SPX Close-65|SPX Close-64|SPX Close-63|SPX Close-62|SPX Close-61|SPX Close-60|SPX Close-59|SPX Close-58|SPX Close-57|SPX Close-56|SPX Close-55|SPX Close-54|SPX Close-53|SPX Close-52|SPX Close-51|SPX Close-50|SPX Close-49|SPX Close-48|SPX Close-47|SPX Close-46|SPX Close-45|SPX Close-44|SPX Close-43|SPX Close-42|SPX Close-41|SPX Close-40|SPX Close-39|SPX Close-38|SPX Close-37|SPX Close-36|SPX Close-35|SPX Close-34|SPX Close-33|SPX Close-32|SPX Close-31|SPX Close-30|SPX Close-29|SPX Close-28|SPX Close-27|SPX Close-26|SPX Close-25|SPX Close-24|SPX Close-23|SPX Close-22|SPX Close-21|SPX Close-20|SPX Close-19|SPX Close-18|SPX Close-17|SPX Close-16|SPX Close-15|SPX Close-14|SPX Close-13|SPX Close-12|SPX Close-11|SPX Close-10"/>
</operator>
<operator activated="true" class="replace_missing_values" compatibility="7.4.000" expanded="true" height="103" name="Replace Missing Values" width="90" x="313" y="442">
<list key="columns"/>
</operator>
<operator activated="true" class="split_validation" compatibility="7.4.000" expanded="true" height="166" name="Validation (2)" width="90" x="313" y="595">
<parameter key="training_set_size" value="1000"/>
<parameter key="sampling_type" value="linear sampling"/>
<process expanded="true">
<operator activated="false" class="k_nn" compatibility="7.4.000" expanded="true" height="82" name="k-NN" width="90" x="112" y="187"/>
<operator activated="false" class="h2o:gradient_boosted_trees" compatibility="7.4.000" expanded="true" height="103" name="Gradient Boosted Trees (2)" width="90" x="112" y="34">
<parameter key="maximal_depth" value="10"/>
<list key="expert_parameters"/>
</operator>
<operator activated="false" class="concurrency:parallel_decision_tree" compatibility="7.4.000" expanded="true" height="82" name="Decision Tree (2)" width="90" x="112" y="289"/>
<operator activated="false" class="bayesian_boosting" compatibility="7.4.000" expanded="true" height="82" name="Bayesian Boosting" width="90" x="112" y="493">
<process expanded="true">
<operator activated="true" class="h2o:gradient_boosted_trees" compatibility="7.4.000" expanded="true" height="103" name="Gradient Boosted Trees (3)" width="90" x="179" y="34">
<parameter key="number_of_trees" value="200"/>
<list key="expert_parameters"/>
</operator>
<connect from_port="training set" to_op="Gradient Boosted Trees (3)" to_port="training set"/>
<connect from_op="Gradient Boosted Trees (3)" from_port="model" to_port="model"/>
<portSpacing port="source_training set" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
</process>
</operator>
<operator activated="false" class="weka:W-IBk" compatibility="7.3.000" expanded="true" height="82" name="W-IBk" width="90" x="246" y="34"/>
<operator activated="false" class="weka:W-KStar" compatibility="7.3.000" expanded="true" height="82" name="W-KStar" width="90" x="246" y="544"/>
<operator activated="false" class="weka:W-LWL" compatibility="7.3.000" expanded="true" height="82" name="W-LWL" width="90" x="246" y="238"/>
<operator activated="false" class="weka:W-LADTree" compatibility="7.3.000" expanded="true" height="82" name="W-LADTree" width="90" x="246" y="340"/>
<operator activated="false" class="weka:W-FT" compatibility="7.3.000" expanded="true" height="82" name="W-FT" width="90" x="246" y="442"/>
<operator activated="false" class="weka:W-NBTree" compatibility="7.3.000" expanded="true" height="82" name="W-NBTree" width="90" x="246" y="136"/>
<operator activated="false" class="support_vector_machine_evolutionary" compatibility="7.4.000" expanded="true" height="82" name="SVM" width="90" x="380" y="187"/>
<operator activated="false" class="bagging" compatibility="7.4.000" expanded="true" height="82" name="Bagging (2)" width="90" x="112" y="595">
<process expanded="true">
<operator activated="true" class="h2o:gradient_boosted_trees" compatibility="7.4.000" expanded="true" height="103" name="Gradient Boosted Trees (4)" width="90" x="179" y="34">
<list key="expert_parameters"/>
</operator>
<connect from_port="training set" to_op="Gradient Boosted Trees (4)" to_port="training set"/>
<connect from_op="Gradient Boosted Trees (4)" from_port="model" to_port="model"/>
<portSpacing port="source_training set" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
</process>
</operator>
<operator activated="true" class="adaboost" compatibility="7.4.000" expanded="true" height="82" name="AdaBoost" width="90" x="112" y="391">
<process expanded="true">
<operator activated="false" class="concurrency:parallel_decision_tree" compatibility="7.4.000" expanded="true" height="82" name="Decision Tree (3)" width="90" x="313" y="85"/>
<operator activated="true" class="h2o:gradient_boosted_trees" compatibility="7.4.000" expanded="true" height="103" name="Gradient Boosted Trees (5)" width="90" x="313" y="238">
<list key="expert_parameters"/>
</operator>
<connect from_port="training set" to_op="Gradient Boosted Trees (5)" to_port="training set"/>
<connect from_op="Gradient Boosted Trees (5)" from_port="model" to_port="model"/>
<portSpacing port="source_training set" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
</process>
</operator>
<connect from_port="training" to_op="AdaBoost" to_port="training set"/>
<connect from_op="AdaBoost" from_port="model" to_port="model"/>
<connect from_op="AdaBoost" from_port="example set" to_port="through 1"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
<portSpacing port="sink_through 2" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="7.4.000" expanded="true" height="82" name="Apply Model (2)" width="90" x="112" y="34">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance_binominal_classification" compatibility="7.4.000" expanded="true" height="82" name="Performance (3)" width="90" x="246" y="34">
<parameter key="classification_error" value="true"/>
<parameter key="kappa" value="true"/>
<parameter key="AUC (optimistic)" value="true"/>
<parameter key="AUC" value="true"/>
<parameter key="AUC (pessimistic)" value="true"/>
<parameter key="precision" value="true"/>
<parameter key="recall" value="true"/>
<parameter key="false_positive" value="true"/>
<parameter key="false_negative" value="true"/>
<parameter key="true_positive" value="true"/>
<parameter key="true_negative" value="true"/>
<parameter key="sensitivity" value="true"/>
<parameter key="positive_predictive_value" value="true"/>
<parameter key="negative_predictive_value" value="true"/>
<parameter key="psep" value="true"/>
</operator>
<connect from_port="model" to_op="Apply Model (2)" to_port="model"/>
<connect from_port="test set" to_op="Apply Model (2)" to_port="unlabelled data"/>
<connect from_op="Apply Model (2)" from_port="labelled data" to_op="Performance (3)" to_port="labelled data"/>
<connect from_op="Performance (3)" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="source_through 2" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
<portSpacing port="sink_averagable 3" spacing="0"/>
<portSpacing port="sink_averagable 4" spacing="0"/>
</process>
</operator>
<connect from_op="Retrieve Date_NDX_SPX_VIX_RUT_DJX_HOLC Data (2)" from_port="output" to_op="Differencing" to_port="example set input"/>
<connect from_op="Differencing" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Windowing" to_port="example set input"/>
<connect from_op="Windowing" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
<connect from_op="Generate Attributes" from_port="example set output" to_op="Set Role (2)" to_port="example set input"/>
<connect from_op="Set Role (2)" from_port="example set output" to_op="Select Attributes (2)" to_port="example set input"/>
<connect from_op="Select Attributes (2)" from_port="example set output" to_op="Replace Missing Values" to_port="example set input"/>
<connect from_op="Replace Missing Values" from_port="example set output" to_op="Validation (2)" to_port="training"/>
<connect from_op="Validation (2)" from_port="model" to_port="result 1"/>
<connect from_op="Validation (2)" from_port="training" to_port="result 2"/>
<connect from_op="Validation (2)" from_port="averagable 1" to_port="result 3"/>
<connect from_op="Validation (2)" from_port="averagable 2" to_port="result 4"/>
<connect from_op="Validation (2)" from_port="averagable 3" to_port="result 5"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
<portSpacing port="sink_result 4" spacing="0"/>
<portSpacing port="sink_result 5" spacing="0"/>
<portSpacing port="sink_result 6" spacing="0"/>
</process>
</operator>
</process>

Find more posts tagged with

Sort by:
1 - 1 of 11