I've checked the input example set and everything looks fine. Here's the error:
It only seems to happen with the logistic regression algorithm.
<?xml version="1.0" encoding="UTF-8"?><process version="9.9.000">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="9.9.000" expanded="true" name="Process">
<parameter key="logverbosity" value="init"/>
<parameter key="random_seed" value="2001"/>
<parameter key="send_mail" value="never"/>
<parameter key="notification_email" value=""/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="SYSTEM"/>
<process expanded="true">
<operator activated="true" class="read_csv" compatibility="9.9.000" expanded="true" height="68" name="Read train (2)" width="90" x="45" y="34">
<parameter key="csv_file" value="C:/Users/ASUS/Documents"/>
<parameter key="column_separators" value=","/>
<parameter key="trim_lines" value="false"/>
<parameter key="use_quotes" value="true"/>
<parameter key="quotes_character" value="""/>
<parameter key="escape_character" value="\"/>
<parameter key="skip_comments" value="false"/>
<parameter key="comment_characters" value="#"/>
<parameter key="starting_row" value="1"/>
<parameter key="parse_numbers" value="true"/>
<parameter key="decimal_character" value="."/>
<parameter key="grouped_digits" value="false"/>
<parameter key="grouping_character" value=","/>
<parameter key="infinity_representation" value=""/>
<parameter key="date_format" value=""/>
<parameter key="first_row_as_names" value="true"/>
<list key="annotations"/>
<parameter key="time_zone" value="SYSTEM"/>
<parameter key="locale" value="English (United States)"/>
<parameter key="encoding" value="SYSTEM"/>
<parameter key="read_all_values_as_polynominal" value="false"/>
<list key="data_set_meta_data_information"/>
<parameter key="read_not_matching_values_as_missings" value="true"/>
</operator>
<operator activated="true" class="filter_examples" compatibility="9.9.000" expanded="true" height="103" name="Remove missing data" width="90" x="179" y="34">
<parameter key="parameter_expression" value=""/>
<parameter key="condition_class" value="no_missing_attributes"/>
<parameter key="invert_filter" value="false"/>
<list key="filters_list"/>
<parameter key="filters_logic_and" value="true"/>
<parameter key="filters_check_metadata" value="true"/>
</operator>
<operator activated="true" class="set_role" compatibility="9.9.000" expanded="true" height="82" name="Set Role (2)" width="90" x="313" y="34">
<parameter key="attribute_name" value="ID"/>
<parameter key="target_role" value="id"/>
<list key="set_additional_roles">
<parameter key="ID" value="id"/>
<parameter key="Target" value="label"/>
</list>
</operator>
<operator activated="true" class="multiply" compatibility="9.9.000" expanded="true" height="103" name="Multiply (2)" width="90" x="447" y="34"/>
<operator activated="true" class="sample" compatibility="9.9.000" expanded="true" height="82" name="Sample (3)" width="90" x="581" y="34">
<parameter key="sample" value="absolute"/>
<parameter key="balance_data" value="true"/>
<parameter key="sample_size" value="100"/>
<parameter key="sample_ratio" value="0.1"/>
<parameter key="sample_probability" value="0.1"/>
<list key="sample_size_per_class">
<parameter key="False" value="51"/>
<parameter key="True" value="51"/>
</list>
<list key="sample_ratio_per_class"/>
<list key="sample_probability_per_class"/>
<parameter key="use_local_random_seed" value="false"/>
<parameter key="local_random_seed" value="1992"/>
</operator>
<operator activated="true" class="concurrency:optimize_parameters_grid" compatibility="9.9.000" expanded="true" height="145" name="with Downsampling" width="90" x="715" y="34">
<list key="parameters">
<parameter key="Remove Correlated Attributes.correlation" value="[0.4;1.0;6;linear]"/>
<parameter key="MRMR-FS.k" value="[10;24;7;linear]"/>
</list>
<parameter key="error_handling" value="fail on error"/>
<parameter key="log_performance" value="true"/>
<parameter key="log_all_criteria" value="false"/>
<parameter key="synchronize" value="false"/>
<parameter key="enable_parallel_execution" value="true"/>
<process expanded="true">
<operator activated="true" class="concurrency:cross_validation" compatibility="9.9.000" expanded="true" height="145" name="Cross Validation (2)" width="90" x="45" y="34">
<parameter key="split_on_batch_attribute" value="false"/>
<parameter key="leave_one_out" value="false"/>
<parameter key="number_of_folds" value="4"/>
<parameter key="sampling_type" value="automatic"/>
<parameter key="use_local_random_seed" value="false"/>
<parameter key="local_random_seed" value="1992"/>
<parameter key="enable_parallel_execution" value="true"/>
<process expanded="true">
<operator activated="true" class="subprocess" compatibility="9.9.000" expanded="true" height="82" name="Remove outliers (2)" width="90" x="45" y="34">
<process expanded="true">
<operator activated="true" class="multiply" compatibility="9.9.000" expanded="true" height="103" name="Multiply (3)" width="90" x="45" y="34"/>
<operator activated="true" class="normalize" compatibility="9.9.000" expanded="true" height="103" name="Normalize (2)" width="90" x="112" y="187">
<parameter key="return_preprocessing_model" value="false"/>
<parameter key="create_view" value="false"/>
<parameter key="attribute_filter_type" value="all"/>
<parameter key="attribute" value=""/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="numeric"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="real"/>
<parameter key="block_type" value="value_series"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_series_end"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
<parameter key="method" value="Z-transformation"/>
<parameter key="min" value="0.0"/>
<parameter key="max" value="1.0"/>
<parameter key="allow_negative_values" value="false"/>
</operator>
<operator activated="true" class="detect_outlier_lof" compatibility="9.9.000" expanded="true" height="82" name="Detect Outlier (LOF)" width="90" x="246" y="187">
<parameter key="minimal_points_lower_bound" value="10"/>
<parameter key="minimal_points_upper_bound" value="20"/>
<parameter key="distance_function" value="euclidian distance"/>
</operator>
<operator activated="true" class="python_scripting:execute_python" compatibility="9.8.000" expanded="true" height="124" name="Execute Python (3)" width="90" x="380" y="34">
<parameter key="script" value="import pandas # rm_main is a mandatory function, # the number of arguments has to be the number of input ports (can be none), # or the number of input ports plus one if "use macros" parameter is set # if you want to use macros, use this instead and check "use macros" parameter: #def rm_main(data,macros): def rm_main(ori, norm): ids = list(norm.loc[norm['outlier']<2, 'ID']) data = ori.set_index('ID', drop = False).loc[ids,:] return data"/>
<parameter key="notebook_cell_tag_filter" value=""/>
<parameter key="use_default_python" value="true"/>
<parameter key="package_manager" value="conda (anaconda)"/>
<parameter key="use_macros" value="false"/>
</operator>
<operator activated="true" class="set_role" compatibility="9.9.000" expanded="true" height="82" name="Set Role (4)" width="90" x="581" y="34">
<parameter key="attribute_name" value="ID"/>
<parameter key="target_role" value="id"/>
<list key="set_additional_roles">
<parameter key="Target" value="label"/>
<parameter key="ID" value="id"/>
</list>
</operator>
<connect from_port="in 1" to_op="Multiply (3)" to_port="input"/>
<connect from_op="Multiply (3)" from_port="output 1" to_op="Execute Python (3)" to_port="input 1"/>
<connect from_op="Multiply (3)" from_port="output 2" to_op="Normalize (2)" to_port="example set input"/>
<connect from_op="Normalize (2)" from_port="example set output" to_op="Detect Outlier (LOF)" to_port="example set input"/>
<connect from_op="Detect Outlier (LOF)" from_port="example set output" to_op="Execute Python (3)" to_port="input 2"/>
<connect from_op="Execute Python (3)" from_port="output 1" to_op="Set Role (4)" to_port="example set input"/>
<connect from_op="Set Role (4)" from_port="example set output" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="remove_correlated_attributes" compatibility="9.9.000" expanded="true" height="82" name="Remove Correlated Attributes" width="90" x="179" y="34">
<parameter key="correlation" value="1.0"/>
<parameter key="filter_relation" value="greater"/>
<parameter key="attribute_order" value="random"/>
<parameter key="use_absolute_correlation" value="true"/>
<parameter key="use_local_random_seed" value="false"/>
<parameter key="local_random_seed" value="1992"/>
</operator>
<operator activated="true" class="featselext:mrmr_feature_selection" compatibility="1.1.004" expanded="true" height="82" name="MRMR-FS" width="90" x="313" y="34">
<parameter key="normalize_weights" value="false"/>
<parameter key="sort_weights" value="false"/>
<parameter key="sort_direction" value="ascending"/>
<parameter key="sets_or_ranks" value="sets"/>
<parameter key="calculate full ranking" value="true"/>
<parameter key="k" value="24"/>
<parameter key="relevance_redundancy_relation" value="quotient"/>
<parameter key="use_ensemble_method" value="none"/>
<parameter key="ensemble_size" value="10"/>
<parameter key="logging" value="false"/>
</operator>
<operator activated="true" class="h2o:logistic_regression" compatibility="9.9.000" expanded="true" height="124" name="Logistic Regression" width="90" x="447" y="34">
<parameter key="solver" value="AUTO"/>
<parameter key="reproducible" value="false"/>
<parameter key="maximum_number_of_threads" value="4"/>
<parameter key="use_regularization" value="false"/>
<parameter key="lambda_search" value="false"/>
<parameter key="number_of_lambdas" value="0"/>
<parameter key="lambda_min_ratio" value="0.0"/>
<parameter key="early_stopping" value="true"/>
<parameter key="stopping_rounds" value="3"/>
<parameter key="stopping_tolerance" value="0.001"/>
<parameter key="standardize" value="true"/>
<parameter key="non-negative_coefficients" value="false"/>
<parameter key="add_intercept" value="true"/>
<parameter key="compute_p-values" value="true"/>
<parameter key="remove_collinear_columns" value="true"/>
<parameter key="missing_values_handling" value="MeanImputation"/>
<parameter key="max_iterations" value="0"/>
<parameter key="max_runtime_seconds" value="0"/>
</operator>
<connect from_port="training set" to_op="Remove outliers (2)" to_port="in 1"/>
<connect from_op="Remove outliers (2)" from_port="out 1" to_op="Remove Correlated Attributes" to_port="example set input"/>
<connect from_op="Remove Correlated Attributes" from_port="example set output" to_op="MRMR-FS" to_port="example set"/>
<connect from_op="MRMR-FS" from_port="example set" to_op="Logistic Regression" to_port="training set"/>
<connect from_op="Logistic Regression" from_port="model" to_port="model"/>
<portSpacing port="source_training set" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="9.9.000" expanded="true" height="82" name="Apply Model (2)" width="90" x="45" y="34">
<list key="application_parameters"/>
<parameter key="create_view" value="false"/>
</operator>
<operator activated="true" class="performance_binominal_classification" compatibility="9.9.000" expanded="true" height="82" name="CV-D" width="90" x="179" y="34">
<parameter key="manually_set_positive_class" value="true"/>
<parameter key="positive_class" value="True"/>
<parameter key="main_criterion" value="recall"/>
<parameter key="accuracy" value="false"/>
<parameter key="classification_error" value="false"/>
<parameter key="kappa" value="true"/>
<parameter key="AUC (optimistic)" value="false"/>
<parameter key="AUC" value="true"/>
<parameter key="AUC (pessimistic)" value="false"/>
<parameter key="precision" value="true"/>
<parameter key="recall" value="true"/>
<parameter key="lift" value="false"/>
<parameter key="fallout" value="false"/>
<parameter key="f_measure" value="false"/>
<parameter key="false_positive" value="false"/>
<parameter key="false_negative" value="false"/>
<parameter key="true_positive" value="false"/>
<parameter key="true_negative" value="false"/>
<parameter key="sensitivity" value="false"/>
<parameter key="specificity" value="false"/>
<parameter key="youden" value="false"/>
<parameter key="positive_predictive_value" value="false"/>
<parameter key="negative_predictive_value" value="false"/>
<parameter key="psep" value="false"/>
<parameter key="skip_undefined_labels" value="true"/>
<parameter key="use_example_weights" value="true"/>
</operator>
<operator activated="true" class="operator_toolbox:performance_auprc" compatibility="2.9.000" expanded="true" height="82" name="Performance (AUPRC)" width="90" x="313" y="34">
<parameter key="main_criterion" value="first"/>
<parameter key="accuracy" value="false"/>
<parameter key="AUC" value="false"/>
<parameter key="AUPRC" value="true"/>
<parameter key="skip_undefined_labels" value="true"/>
<parameter key="use_example_weights" value="true"/>
</operator>
<operator activated="true" class="radiomics_test:my_own_operator" compatibility="1.0.000" expanded="true" height="82" name="Performance (Fbeta-score)" width="90" x="447" y="34">
<parameter key="Manually set positive class" value="true"/>
<parameter key="Positive class" value="True"/>
<parameter key="Make Fbeta-score the main criterion" value="true"/>
<parameter key="Beta" value="2.0"/>
</operator>
<connect from_port="model" to_op="Apply Model (2)" to_port="model"/>
<connect from_port="test set" to_op="Apply Model (2)" to_port="unlabelled data"/>
<connect from_op="Apply Model (2)" from_port="labelled data" to_op="CV-D" to_port="labelled data"/>
<connect from_op="CV-D" from_port="performance" to_op="Performance (AUPRC)" to_port="performance"/>
<connect from_op="CV-D" from_port="example set" to_op="Performance (AUPRC)" to_port="labelled data"/>
<connect from_op="Performance (AUPRC)" from_port="performance" to_op="Performance (Fbeta-score)" to_port="performance vector"/>
<connect from_op="Performance (AUPRC)" from_port="example set" to_op="Performance (Fbeta-score)" to_port="labelled example set"/>
<connect from_op="Performance (Fbeta-score)" from_port="performance vector" to_port="performance 1"/>
<connect from_op="Performance (Fbeta-score)" from_port="labelled example set" to_port="test set results"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_test set results" spacing="0"/>
<portSpacing port="sink_performance 1" spacing="0"/>
<portSpacing port="sink_performance 2" spacing="0"/>
</process>
</operator>
<connect from_port="input 1" to_op="Cross Validation (2)" to_port="example set"/>
<connect from_op="Cross Validation (2)" from_port="model" to_port="model"/>
<connect from_op="Cross Validation (2)" from_port="test result set" to_port="output 1"/>
<connect from_op="Cross Validation (2)" from_port="performance 1" to_port="performance"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_performance" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="store" compatibility="9.9.000" expanded="true" height="68" name="Store" width="90" x="1117" y="85">
<parameter key="repository_entry" value="../Models_mRMR/G_D_mRMR_LR"/>
</operator>
<operator activated="true" class="concurrency:optimize_parameters_grid" compatibility="9.9.000" expanded="true" height="145" name="without downsampling" width="90" x="715" y="187">
<list key="parameters">
<parameter key="Remove Correlated Attributes (2).correlation" value="[0.4;1.0;6;linear]"/>
<parameter key="MRMR-FS (2).k" value="[10;24;7;linear]"/>
</list>
<parameter key="error_handling" value="fail on error"/>
<parameter key="log_performance" value="true"/>
<parameter key="log_all_criteria" value="false"/>
<parameter key="synchronize" value="false"/>
<parameter key="enable_parallel_execution" value="true"/>
<process expanded="true">
<operator activated="true" class="concurrency:cross_validation" compatibility="9.9.000" expanded="true" height="145" name="Cross Validation" width="90" x="45" y="34">
<parameter key="split_on_batch_attribute" value="false"/>
<parameter key="leave_one_out" value="false"/>
<parameter key="number_of_folds" value="4"/>
<parameter key="sampling_type" value="automatic"/>
<parameter key="use_local_random_seed" value="false"/>
<parameter key="local_random_seed" value="1992"/>
<parameter key="enable_parallel_execution" value="true"/>
<process expanded="true">
<operator activated="true" class="subprocess" compatibility="9.9.000" expanded="true" height="82" name="Remove outliers" width="90" x="45" y="34">
<process expanded="true">
<operator activated="true" class="multiply" compatibility="9.9.000" expanded="true" height="103" name="Multiply (4)" width="90" x="45" y="34"/>
<operator activated="true" class="normalize" compatibility="9.9.000" expanded="true" height="103" name="Normalize" width="90" x="112" y="187">
<parameter key="return_preprocessing_model" value="false"/>
<parameter key="create_view" value="false"/>
<parameter key="attribute_filter_type" value="all"/>
<parameter key="attribute" value=""/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="numeric"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="real"/>
<parameter key="block_type" value="value_series"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_series_end"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
<parameter key="method" value="Z-transformation"/>
<parameter key="min" value="0.0"/>
<parameter key="max" value="1.0"/>
<parameter key="allow_negative_values" value="false"/>
</operator>
<operator activated="true" class="detect_outlier_lof" compatibility="9.9.000" expanded="true" height="82" name="Detect Outlier (LOF) (2)" width="90" x="246" y="187">
<parameter key="minimal_points_lower_bound" value="10"/>
<parameter key="minimal_points_upper_bound" value="20"/>
<parameter key="distance_function" value="euclidian distance"/>
</operator>
<operator activated="true" class="python_scripting:execute_python" compatibility="9.8.000" expanded="true" height="124" name="Execute Python (4)" width="90" x="380" y="34">
<parameter key="script" value="import pandas # rm_main is a mandatory function, # the number of arguments has to be the number of input ports (can be none), # or the number of input ports plus one if "use macros" parameter is set # if you want to use macros, use this instead and check "use macros" parameter: #def rm_main(data,macros): def rm_main(ori, norm): ids = list(norm.loc[norm['outlier']<2, 'ID']) data = ori.set_index('ID', drop = False).loc[ids,:] return data"/>
<parameter key="notebook_cell_tag_filter" value=""/>
<parameter key="use_default_python" value="true"/>
<parameter key="package_manager" value="conda (anaconda)"/>
<parameter key="use_macros" value="false"/>
</operator>
<operator activated="true" class="set_role" compatibility="9.9.000" expanded="true" height="82" name="Set Role (5)" width="90" x="581" y="34">
<parameter key="attribute_name" value="ID"/>
<parameter key="target_role" value="id"/>
<list key="set_additional_roles">
<parameter key="Target" value="label"/>
<parameter key="ID" value="id"/>
</list>
</operator>
<connect from_port="in 1" to_op="Multiply (4)" to_port="input"/>
<connect from_op="Multiply (4)" from_port="output 1" to_op="Execute Python (4)" to_port="input 1"/>
<connect from_op="Multiply (4)" from_port="output 2" to_op="Normalize" to_port="example set input"/>
<connect from_op="Normalize" from_port="example set output" to_op="Detect Outlier (LOF) (2)" to_port="example set input"/>
<connect from_op="Detect Outlier (LOF) (2)" from_port="example set output" to_op="Execute Python (4)" to_port="input 2"/>
<connect from_op="Execute Python (4)" from_port="output 1" to_op="Set Role (5)" to_port="example set input"/>
<connect from_op="Set Role (5)" from_port="example set output" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="remove_correlated_attributes" compatibility="9.9.000" expanded="true" height="82" name="Remove Correlated Attributes (2)" width="90" x="179" y="34">
<parameter key="correlation" value="0.2"/>
<parameter key="filter_relation" value="greater"/>
<parameter key="attribute_order" value="random"/>
<parameter key="use_absolute_correlation" value="true"/>
<parameter key="use_local_random_seed" value="false"/>
<parameter key="local_random_seed" value="1992"/>
</operator>
<operator activated="true" class="featselext:mrmr_feature_selection" compatibility="1.1.004" expanded="true" height="82" name="MRMR-FS (2)" width="90" x="313" y="34">
<parameter key="normalize_weights" value="false"/>
<parameter key="sort_weights" value="false"/>
<parameter key="sort_direction" value="ascending"/>
<parameter key="sets_or_ranks" value="sets"/>
<parameter key="calculate full ranking" value="true"/>
<parameter key="k" value="100"/>
<parameter key="relevance_redundancy_relation" value="quotient"/>
<parameter key="use_ensemble_method" value="none"/>
<parameter key="ensemble_size" value="10"/>
<parameter key="logging" value="false"/>
</operator>
<operator activated="true" class="h2o:logistic_regression" compatibility="9.9.000" expanded="true" height="124" name="Logistic Regression (2)" width="90" x="581" y="34">
<parameter key="solver" value="AUTO"/>
<parameter key="reproducible" value="false"/>
<parameter key="maximum_number_of_threads" value="4"/>
<parameter key="use_regularization" value="false"/>
<parameter key="lambda_search" value="false"/>
<parameter key="number_of_lambdas" value="0"/>
<parameter key="lambda_min_ratio" value="0.0"/>
<parameter key="early_stopping" value="true"/>
<parameter key="stopping_rounds" value="3"/>
<parameter key="stopping_tolerance" value="0.001"/>
<parameter key="standardize" value="true"/>
<parameter key="non-negative_coefficients" value="false"/>
<parameter key="add_intercept" value="true"/>
<parameter key="compute_p-values" value="true"/>
<parameter key="remove_collinear_columns" value="true"/>
<parameter key="missing_values_handling" value="MeanImputation"/>
<parameter key="max_iterations" value="0"/>
<parameter key="max_runtime_seconds" value="0"/>
</operator>
<connect from_port="training set" to_op="Remove outliers" to_port="in 1"/>
<connect from_op="Remove outliers" from_port="out 1" to_op="Remove Correlated Attributes (2)" to_port="example set input"/>
<connect from_op="Remove Correlated Attributes (2)" from_port="example set output" to_op="MRMR-FS (2)" to_port="example set"/>
<connect from_op="MRMR-FS (2)" from_port="example set" to_op="Logistic Regression (2)" to_port="training set"/>
<connect from_op="Logistic Regression (2)" from_port="model" to_port="model"/>
<portSpacing port="source_training set" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="9.9.000" expanded="true" height="82" name="Apply Model" width="90" x="45" y="34">
<list key="application_parameters"/>
<parameter key="create_view" value="false"/>
</operator>
<operator activated="true" class="performance_binominal_classification" compatibility="9.9.000" expanded="true" height="82" name="CV-nD" width="90" x="179" y="34">
<parameter key="manually_set_positive_class" value="true"/>
<parameter key="positive_class" value="True"/>
<parameter key="main_criterion" value="recall"/>
<parameter key="accuracy" value="false"/>
<parameter key="classification_error" value="false"/>
<parameter key="kappa" value="true"/>
<parameter key="AUC (optimistic)" value="false"/>
<parameter key="AUC" value="true"/>
<parameter key="AUC (pessimistic)" value="false"/>
<parameter key="precision" value="true"/>
<parameter key="recall" value="true"/>
<parameter key="lift" value="false"/>
<parameter key="fallout" value="false"/>
<parameter key="f_measure" value="false"/>
<parameter key="false_positive" value="false"/>
<parameter key="false_negative" value="false"/>
<parameter key="true_positive" value="false"/>
<parameter key="true_negative" value="false"/>
<parameter key="sensitivity" value="false"/>
<parameter key="specificity" value="false"/>
<parameter key="youden" value="false"/>
<parameter key="positive_predictive_value" value="false"/>
<parameter key="negative_predictive_value" value="false"/>
<parameter key="psep" value="false"/>
<parameter key="skip_undefined_labels" value="true"/>
<parameter key="use_example_weights" value="true"/>
</operator>
<operator activated="true" class="operator_toolbox:performance_auprc" compatibility="2.9.000" expanded="true" height="82" name="Performance (AUPRC) (2)" width="90" x="313" y="34">
<parameter key="main_criterion" value="first"/>
<parameter key="accuracy" value="false"/>
<parameter key="AUC" value="false"/>
<parameter key="AUPRC" value="true"/>
<parameter key="skip_undefined_labels" value="true"/>
<parameter key="use_example_weights" value="true"/>
</operator>
<operator activated="true" class="radiomics_test:my_own_operator" compatibility="1.0.000" expanded="true" height="82" name="Performance (Fbeta-score) (3)" width="90" x="447" y="34">
<parameter key="Manually set positive class" value="true"/>
<parameter key="Positive class" value="True"/>
<parameter key="Make Fbeta-score the main criterion" value="true"/>
<parameter key="Beta" value="2.0"/>
</operator>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="CV-nD" to_port="labelled data"/>
<connect from_op="CV-nD" from_port="performance" to_op="Performance (AUPRC) (2)" to_port="performance"/>
<connect from_op="CV-nD" from_port="example set" to_op="Performance (AUPRC) (2)" to_port="labelled data"/>
<connect from_op="Performance (AUPRC) (2)" from_port="performance" to_op="Performance (Fbeta-score) (3)" to_port="performance vector"/>
<connect from_op="Performance (AUPRC) (2)" from_port="example set" to_op="Performance (Fbeta-score) (3)" to_port="labelled example set"/>
<connect from_op="Performance (Fbeta-score) (3)" from_port="performance vector" to_port="performance 1"/>
<connect from_op="Performance (Fbeta-score) (3)" from_port="labelled example set" to_port="test set results"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_test set results" spacing="0"/>
<portSpacing port="sink_performance 1" spacing="0"/>
<portSpacing port="sink_performance 2" spacing="0"/>
</process>
</operator>
<connect from_port="input 1" to_op="Cross Validation" to_port="example set"/>
<connect from_op="Cross Validation" from_port="model" to_port="model"/>
<connect from_op="Cross Validation" from_port="test result set" to_port="output 1"/>
<connect from_op="Cross Validation" from_port="performance 1" to_port="performance"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_performance" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="store" compatibility="9.9.000" expanded="true" height="68" name="Store (2)" width="90" x="1050" y="238">
<parameter key="repository_entry" value="../Models_mRMR/G_nD_mRMR_LR"/>
</operator>
<operator activated="true" class="python_scripting:execute_python" compatibility="9.8.000" expanded="true" height="124" name="DeLong Test (AUPRC) (3)" width="90" x="916" y="289">
<parameter key="script" value="import pandas import scipy.stats as st from sklearn import metrics from sklearn.metrics import precision_recall_curve from sklearn.metrics import auc def kernel(X, Y): return .5 if Y==X else int(Y < X) def structural_components(X, Y): V10 = [1/len(Y) * sum([kernel(x, y) for y in Y]) for x in X] V01 = [1/len(X) * sum([kernel(x, y) for x in X]) for y in Y] return V10, V01 def get_S_entry(V_A, V_B, auc_A, auc_B): return 1/(len(V_A)-1) * sum([(a-auc_A)*(b-auc_B) for a,b in zip(V_A, V_B)]) def z_score(var_A, var_B, covar_AB, auc_A, auc_B): return (auc_A - auc_B)/((var_A + var_B - 2*covar_AB)**(.5)) def group_preds_by_label(preds, actual): X = [p for (p, a) in zip(preds, actual) if a=='True'] Y = [p for (p, a) in zip(preds, actual) if not a=='True'] return X, Y def rm_main(dataA, dataB): preds_A = dataA.loc[:, 'prediction(Target)'] preds_B = dataB.loc[:, 'prediction(Target)'] actual_A = dataA.loc[:, 'Target'] actual_B = dataB.loc[:, 'Target'] X_A, Y_A = group_preds_by_label(preds_A, actual_A) X_B, Y_B = group_preds_by_label(preds_B, actual_B) V_A10, V_A01 = structural_components(X_A, Y_A) V_B10, V_B01 = structural_components(X_B, Y_B) a_A = [1 if elem == 'True' else 0 for elem in actual_A] a_B = [1 if elem == 'True' else 0 for elem in actual_B] p_A = [1 if elem == 'True' else 0 for elem in preds_A] p_B = [1 if elem == 'True' else 0 for elem in preds_B] precision_A, recall_A, thresholds_A = precision_recall_curve(a_A, p_A) auc_A = auc(recall_A, precision_A) precision_B, recall_B, thresholds_B = precision_recall_curve(a_B, p_B) auc_B = auc(recall_B, precision_B) # Compute entries of covariance matrix S (covar_AB = covar_BA) var_A = (get_S_entry(V_A10, V_A10, auc_A, auc_A) * 1/len(V_A10) + get_S_entry(V_A01, V_A01, auc_A, auc_A) * 1/len(V_A01)) var_B = (get_S_entry(V_B10, V_B10, auc_B, auc_B) * 1/len(V_B10) + get_S_entry(V_B01, V_B01, auc_B, auc_B) * 1/len(V_B01)) covar_AB = (get_S_entry(V_A10, V_B10, auc_A, auc_B) * 1/len(V_A10) + get_S_entry(V_A01, V_B01, auc_A, auc_B) * 1/len(V_A01)) # Two tailed test z = z_score(var_A, var_B, covar_AB, auc_A, auc_B) p = st.norm.sf(abs(z))*2 print('Is AUPRC_A significantly different from AUPRC_B?') print('CV p-value:', p) return p"/>
<parameter key="notebook_cell_tag_filter" value=""/>
<parameter key="use_default_python" value="true"/>
<parameter key="package_manager" value="conda (anaconda)"/>
<parameter key="use_macros" value="false"/>
</operator>
<connect from_op="Read train (2)" from_port="output" to_op="Remove missing data" to_port="example set input"/>
<connect from_op="Remove missing data" from_port="example set output" to_op="Set Role (2)" to_port="example set input"/>
<connect from_op="Set Role (2)" from_port="example set output" to_op="Multiply (2)" to_port="input"/>
<connect from_op="Multiply (2)" from_port="output 1" to_op="Sample (3)" to_port="example set input"/>
<connect from_op="Multiply (2)" from_port="output 2" to_op="without downsampling" to_port="input 1"/>
<connect from_op="Sample (3)" from_port="example set output" to_op="with Downsampling" to_port="input 1"/>
<connect from_op="with Downsampling" from_port="performance" to_port="result 1"/>
<connect from_op="with Downsampling" from_port="model" to_op="Store" to_port="input"/>
<connect from_op="with Downsampling" from_port="output 1" to_op="DeLong Test (AUPRC) (3)" to_port="input 1"/>
<connect from_op="without downsampling" from_port="performance" to_port="result 2"/>
<connect from_op="without downsampling" from_port="model" to_op="Store (2)" to_port="input"/>
<connect from_op="without downsampling" from_port="output 1" to_op="DeLong Test (AUPRC) (3)" to_port="input 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>