Reduction Dimensionality
Flixport
New Altair Community Member
Hey Guys,
We have applied the PCA and ChiSquared methods to the topic of dimensionality reduction. This reduces the data volume considerably, which is my question: Can I remove the main component analysis PCA and leave the Weight By PCA method?
I would like to thank you for answering my question.
I would like to thank you for answering my question.
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="9.2.001" expanded="true" name="Process">
<parameter key="logverbosity" value="init"/>
<parameter key="random_seed" value="2001"/>
<parameter key="send_mail" value="never"/>
<parameter key="notification_email" value=""/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="UTF-8"/>
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="9.2.001" expanded="true" height="68" name="Retrieve reut2" width="90" x="45" y="187">
<parameter key="repository_entry" value="reut2"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="9.2.001" expanded="true" height="82" name="Select Attributes" width="90" x="179" y="187">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attribute" value=""/>
<parameter key="attributes" value="exchanges|orgs|people|places|text_orig|topics|zahlen"/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="attribute_value"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="time"/>
<parameter key="block_type" value="attribute_block"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_matrix_row_start"/>
<parameter key="invert_selection" value="true"/>
<parameter key="include_special_attributes" value="true"/>
</operator>
<operator activated="true" class="generate_id" compatibility="9.2.001" expanded="true" height="82" name="Generate ID" width="90" x="313" y="187">
<parameter key="create_nominal_ids" value="false"/>
<parameter key="offset" value="0"/>
</operator>
<operator activated="true" class="filter_examples" compatibility="9.2.001" expanded="true" height="103" name="Filter Examples" width="90" x="45" y="289">
<parameter key="parameter_expression" value=""/>
<parameter key="condition_class" value="custom_filters"/>
<parameter key="invert_filter" value="false"/>
<list key="filters_list">
<parameter key="filters_entry_key" value="category.does_not_equal.?"/>
</list>
<parameter key="filters_logic_and" value="true"/>
<parameter key="filters_check_metadata" value="true"/>
</operator>
<operator activated="true" class="set_role" compatibility="9.2.001" expanded="true" height="82" name="Set Role" width="90" x="179" y="289">
<parameter key="attribute_name" value="category"/>
<parameter key="target_role" value="label"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="remove_correlated_attributes" compatibility="9.2.001" expanded="true" height="82" name="Remove Correlated Attributes" width="90" x="313" y="289">
<parameter key="correlation" value="0.8"/>
<parameter key="filter_relation" value="greater"/>
<parameter key="attribute_order" value="random"/>
<parameter key="use_absolute_correlation" value="true"/>
<parameter key="use_local_random_seed" value="false"/>
<parameter key="local_random_seed" value="1992"/>
</operator>
<operator activated="true" class="subprocess" compatibility="9.2.001" expanded="true" height="124" name="Subprocess" width="90" x="581" y="187">
<process expanded="true">
<operator activated="true" class="multiply" compatibility="9.2.001" expanded="true" height="124" name="Multiply (2)" width="90" x="380" y="442"/>
<operator activated="true" class="weight_by_chi_squared_statistic" compatibility="9.2.001" expanded="true" height="82" name="Weight by Chi Squared Statistic" width="90" x="648" y="289">
<parameter key="normalize_weights" value="false"/>
<parameter key="sort_weights" value="true"/>
<parameter key="sort_direction" value="descending"/>
<parameter key="number_of_bins" value="10"/>
</operator>
<operator activated="true" class="select_by_weights" compatibility="9.2.001" expanded="true" height="103" name="Select by Weights (ChiSq)" width="90" x="849" y="289">
<parameter key="weight_relation" value="top k"/>
<parameter key="weight" value="10.0"/>
<parameter key="k" value="45"/>
<parameter key="p" value="0.1"/>
<parameter key="deselect_unknown" value="true"/>
<parameter key="use_absolute_weights" value="false"/>
</operator>
<operator activated="true" class="store" compatibility="9.2.001" expanded="true" height="68" name="Store" width="90" x="1050" y="289">
<parameter key="repository_entry" value="data/data_out_select_by_chisq_weights"/>
</operator>
<operator activated="true" class="principal_component_analysis" compatibility="9.2.001" expanded="true" height="103" name="PCA" width="90" x="648" y="442">
<parameter key="dimensionality_reduction" value="keep variance"/>
<parameter key="variance_threshold" value="0.8"/>
<parameter key="number_of_components" value="1"/>
</operator>
<operator activated="true" class="weight_by_pca" compatibility="9.2.001" expanded="true" height="82" name="Weight by PCA" width="90" x="648" y="595">
<parameter key="normalize_weights" value="false"/>
<parameter key="sort_weights" value="true"/>
<parameter key="sort_direction" value="ascending"/>
<parameter key="component_number" value="1"/>
</operator>
<operator activated="true" class="select_by_weights" compatibility="9.2.001" expanded="true" height="103" name="Select by Weights (PCA)" width="90" x="849" y="595">
<parameter key="weight_relation" value="top k"/>
<parameter key="weight" value="10.0"/>
<parameter key="k" value="45"/>
<parameter key="p" value="0.1"/>
<parameter key="deselect_unknown" value="true"/>
<parameter key="use_absolute_weights" value="true"/>
</operator>
<operator activated="true" class="store" compatibility="9.2.001" expanded="true" height="68" name="Store (3)" width="90" x="1050" y="595">
<parameter key="repository_entry" value="data/data_out_select_by_pca_weights"/>
</operator>
<operator activated="true" class="store" compatibility="9.2.001" expanded="true" height="68" name="Store (2)" width="90" x="1050" y="442">
<parameter key="repository_entry" value="data/data_out_pca"/>
</operator>
<connect from_port="in 1" to_op="Multiply (2)" to_port="input"/>
<connect from_op="Multiply (2)" from_port="output 1" to_op="Weight by Chi Squared Statistic" to_port="example set"/>
<connect from_op="Multiply (2)" from_port="output 2" to_op="PCA" to_port="example set input"/>
<connect from_op="Multiply (2)" from_port="output 3" to_op="Weight by PCA" to_port="example set"/>
<connect from_op="Weight by Chi Squared Statistic" from_port="weights" to_op="Select by Weights (ChiSq)" to_port="weights"/>
<connect from_op="Weight by Chi Squared Statistic" from_port="example set" to_op="Select by Weights (ChiSq)" to_port="example set input"/>
<connect from_op="Select by Weights (ChiSq)" from_port="example set output" to_op="Store" to_port="input"/>
<connect from_op="Store" from_port="through" to_port="out 1"/>
<connect from_op="PCA" from_port="example set output" to_op="Store (2)" to_port="input"/>
<connect from_op="Weight by PCA" from_port="weights" to_op="Select by Weights (PCA)" to_port="weights"/>
<connect from_op="Weight by PCA" from_port="example set" to_op="Select by Weights (PCA)" to_port="example set input"/>
<connect from_op="Select by Weights (PCA)" from_port="example set output" to_op="Store (3)" to_port="input"/>
<connect from_op="Store (3)" from_port="through" to_port="out 3"/>
<connect from_op="Store (2)" from_port="through" to_port="out 2"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
<portSpacing port="sink_out 3" spacing="0"/>
<portSpacing port="sink_out 4" spacing="0"/>
</process>
<description align="center" color="transparent" colored="false" width="126">Subprozess</description>
</operator>
<connect from_op="Retrieve reut2" from_port="output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Generate ID" to_port="example set input"/>
<connect from_op="Generate ID" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
<connect from_op="Filter Examples" from_port="example set output" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Remove Correlated Attributes" to_port="example set input"/>
<connect from_op="Remove Correlated Attributes" from_port="example set output" to_op="Subprocess" to_port="in 1"/>
<connect from_op="Subprocess" from_port="out 1" to_port="result 1"/>
<connect from_op="Subprocess" from_port="out 2" to_port="result 2"/>
<connect from_op="Subprocess" from_port="out 3" to_port="result 3"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
<portSpacing port="sink_result 4" spacing="0"/>
</process>
</operator>
</process>
0
Best Answer
-
If you are not using then you can remove PCA, if you just need weights from PCA you can keep the current Weights to PCA operator and remove the PCA operator. You can tailor the process for your needs.5
Answers
-
If you are not using then you can remove PCA, if you just need weights from PCA you can keep the current Weights to PCA operator and remove the PCA operator. You can tailor the process for your needs.5