How to change the output of loop label to write down the Excel files based on the name of the label

mansour
mansour New Altair Community Member
edited November 5 in Community Q&A
Dear All
I am running the following loop to go through multi-labels and write the outputs into Excel file each time. The macro I am using %{a}.xlsx just writes each Excel file name based on numbers (1.xlxs, 2.xlxs, ...). I want to change this macro to write down the name of each label. I mean the Excel outputs will be the name of first lable.xlxs, second label.xlxs, ...).
Thanks
<?xml version="1.0" encoding="UTF-8"?><process version="10.3.000">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="10.3.000" expanded="true" name="Process" origin="GENERATED_TUTORIAL">
<parameter key="logverbosity" value="init"/>
<parameter key="random_seed" value="2001"/>
<parameter key="send_mail" value="never"/>
<parameter key="notification_email" value=""/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="SYSTEM"/>
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="10.3.000" expanded="true" height="68" name="Retrieve Quartiled All data" width="90" x="45" y="85">
<parameter key="repository_entry" value="Quartiled All data"/>
</operator>
<operator activated="true" class="blending:set_role" compatibility="10.3.000" expanded="true" height="82" name="Set Role (2)" origin="GENERATED_TUTORIAL" width="90" x="179" y="85">
<list key="set_roles">
<parameter key="Hardness Bite 1 (N)" value="label"/>
<parameter key="Hardness Bite 2 (N)" value="label"/>
<parameter key="Adhesive Force (N)" value="label"/>
<parameter key="Cohesiveness" value="label"/>
<parameter key="Gumminess (N)" value="label"/>
<parameter key="Springiness (mm)" value="label"/>
<parameter key="Chewiness (N)" value="label"/>
<parameter key="Resilience" value="label"/>
<parameter key="gas amount (mol/g)" value="label"/>
<parameter key="baking loss" value="label"/>
<parameter key="Crust Colour a*" value="label"/>
<parameter key="Crust Colour b*" value="label"/>
<parameter key="Crust Colour L*" value="label"/>
<parameter key="Crust ^E*" value="label"/>
<parameter key="Crumb ^E*" value="label"/>
<parameter key="Crumb Colour a*" value="label"/>
<parameter key="Crumb Colour b*" value="label"/>
<parameter key="Crumb Colour L*" value="label"/>
<parameter key="void_fraction" value="label"/>
<parameter key="mean_area_mm2" value="label"/>
<parameter key="cell_density_cm2" value="label"/>
<parameter key="small_cell_density_cm2" value="label"/>
<parameter key="large_cell_density_cm2" value="label"/>
<parameter key="range" value="label"/>
<parameter key="mean_ecc" value="label"/>
</list>
</operator>
<operator activated="true" class="loop_labels" compatibility="10.3.000" expanded="true" height="82" name="Loop Labels" origin="GENERATED_TUTORIAL" width="90" x="313" y="85">
<process expanded="true">
<operator activated="true" class="replace_missing_values" compatibility="10.3.000" expanded="true" height="103" name="Replace Missing Values (4)" width="90" x="45" y="34">
<parameter key="return_preprocessing_model" value="false"/>
<parameter key="attribute_filter_type" value="all"/>
<parameter key="attribute" value=""/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="attribute_value"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="time"/>
<parameter key="block_type" value="attribute_block"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_matrix_row_start"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="true"/>
<parameter key="default" value="average"/>
<list key="columns"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="10.3.000" expanded="true" height="82" name="Select Attributes" width="90" x="179" y="34">
<parameter key="attribute_filter_type" value="all"/>
<parameter key="attribute" value="SCC"/>
<parameter key="attributes" value="|au_scc|Vol|Prot|Peak_Flow|Milking_Time|Lact|Fat|EC|Date"/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="numeric"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="time"/>
<parameter key="block_type" value="attribute_block"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_matrix_row_start"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
</operator>
<operator activated="true" class="weight_by_information_gain" compatibility="10.3.000" expanded="true" height="82" name="Weight by Information Gain" width="90" x="313" y="21">
<parameter key="normalize_weights" value="true"/>
<parameter key="sort_weights" value="true"/>
<parameter key="sort_direction" value="ascending"/>
</operator>
<operator activated="true" class="weights_to_data" compatibility="10.3.000" expanded="true" height="68" name="Weights to Data" width="90" x="447" y="34"/>
<operator activated="true" class="set_role" compatibility="5.3.013" expanded="true" height="82" name="Set Role" width="90" x="581" y="34">
<parameter key="attribute_name" value="Attribute"/>
<parameter key="target_role" value="id"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="rename" compatibility="10.3.000" expanded="true" height="82" name="Rename" width="90" x="715" y="34">
<parameter key="old_name" value="Weight"/>
<parameter key="new_name" value="Weight_Info Gain"/>
<list key="rename_additional_attributes"/>
</operator>
<operator activated="true" class="write_excel" compatibility="10.3.000" expanded="true" height="103" name="Write Excel" width="90" x="849" y="34">
<parameter key="excel_file" value="/Users/Mansour/Downloads/Maman_Mona/%{a}.xlsx"/>
<parameter key="file_format" value="xlsx"/>
<enumeration key="sheet_names"/>
<parameter key="sheet_name" value="RapidMiner Data"/>
<parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
<parameter key="number_format" value="#.0"/>
<parameter key="encoding" value="SYSTEM"/>
</operator>
<connect from_port="example set" to_op="Replace Missing Values (4)" to_port="example set input"/>
<connect from_op="Replace Missing Values (4)" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Weight by Information Gain" to_port="example set"/>
<connect from_op="Weight by Information Gain" from_port="weights" to_op="Weights to Data" to_port="attribute weights"/>
<connect from_op="Weights to Data" from_port="example set" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Rename" to_port="example set input"/>
<connect from_op="Rename" from_port="example set output" to_op="Write Excel" to_port="input"/>
<connect from_op="Write Excel" from_port="through" to_port="out 1"/>
<portSpacing port="source_example set" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<connect from_op="Retrieve Quartiled All data" from_port="output" to_op="Set Role (2)" to_port="example set input"/>
<connect from_op="Set Role (2)" from_port="example set output" to_op="Loop Labels" to_port="example set"/>
<connect from_op="Loop Labels" from_port="out 1" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="90"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
Tagged:

Answers

  • CKönig
    CKönig New Altair Community Member
    Hi @mansour,
    I have created an example process detailing how to extract that information. You will need an operator from the "Operator Toolbox" extension called "Extract Statistics".

    https://marketplace.rapidminer.com/UpdateServer/faces/product_details.xhtml?productId=rmx_operator_toolbox

    <?xml version="1.0" encoding="UTF-8"?><process version="10.2.000">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="10.2.000" expanded="true" name="Process" origin="GENERATED_TUTORIAL">
        <parameter key="logverbosity" value="init"/>
        <parameter key="random_seed" value="2001"/>
        <parameter key="send_mail" value="never"/>
        <parameter key="notification_email" value=""/>
        <parameter key="process_duration_for_mail" value="30"/>
        <parameter key="encoding" value="SYSTEM"/>
        <process expanded="true">
          <operator activated="true" class="retrieve" compatibility="10.2.000" expanded="true" height="68" name="Golf" origin="GENERATED_TUTORIAL" width="90" x="45" y="34">
            <parameter key="repository_entry" value="//Samples/data/Golf"/>
          </operator>
          <operator activated="true" breakpoints="after" class="blending:set_role" compatibility="10.2.000" expanded="true" height="82" name="Set Role (2)" origin="GENERATED_TUTORIAL" width="90" x="179" y="34">
            <list key="set_roles">
              <parameter key="Wind" value="label"/>
              <parameter key="Outlook" value="label"/>
            </list>
            <description align="center" color="transparent" colored="false" width="126">Set additional labels so the dataset now contains 3 columns with role &amp;quot;label&amp;quot;</description>
          </operator>
          <operator activated="true" class="loop_labels" compatibility="10.2.000" expanded="true" height="103" name="Loop Labels" origin="GENERATED_TUTORIAL" width="90" x="313" y="34">
            <process expanded="true">
              <operator activated="true" class="multiply" compatibility="10.2.000" expanded="true" height="103" name="Multiply" width="90" x="45" y="34"/>
              <operator activated="true" class="operator_toolbox:extract_statistics" compatibility="2.16.000" expanded="true" height="82" name="Extract Statistics" width="90" x="179" y="187">
                <parameter key="attribute_filter_type" value="all"/>
                <parameter key="attribute" value=""/>
                <parameter key="attributes" value=""/>
                <parameter key="use_except_expression" value="false"/>
                <parameter key="value_type" value="attribute_value"/>
                <parameter key="use_value_type_exception" value="false"/>
                <parameter key="except_value_type" value="time"/>
                <parameter key="block_type" value="attribute_block"/>
                <parameter key="use_block_type_exception" value="false"/>
                <parameter key="except_block_type" value="value_matrix_row_start"/>
                <parameter key="invert_selection" value="false"/>
                <parameter key="include_special_attributes" value="false"/>
                <description align="center" color="transparent" colored="false" width="126">get the &amp;quot;meta information&amp;quot; about the example set</description>
              </operator>
              <operator activated="true" class="filter_examples" compatibility="10.2.000" expanded="true" height="103" name="Filter Examples" width="90" x="313" y="187">
                <parameter key="parameter_expression" value=""/>
                <parameter key="condition_class" value="custom_filters"/>
                <parameter key="invert_filter" value="false"/>
                <list key="filters_list">
                  <parameter key="filters_entry_key" value="Role.equals.label"/>
                </list>
                <parameter key="filters_logic_and" value="true"/>
                <parameter key="filters_check_metadata" value="true"/>
                <description align="center" color="transparent" colored="false" width="126">only keep info about role &amp;quot;label&amp;quot;</description>
              </operator>
              <operator activated="true" class="extract_macro" compatibility="10.2.000" expanded="true" height="68" name="Extract Macro" width="90" x="447" y="187">
                <parameter key="macro" value="currentLabel"/>
                <parameter key="macro_type" value="data_value"/>
                <parameter key="statistics" value="average"/>
                <parameter key="attribute_name" value="Name"/>
                <parameter key="example_index" value="1"/>
                <list key="additional_macros"/>
                <description align="center" color="transparent" colored="false" width="126">Get the name of the attribute with role &amp;quot;label&amp;quot;</description>
              </operator>
              <operator activated="true" class="concurrency:parallel_decision_tree" compatibility="10.2.000" expanded="true" height="103" name="Decision Tree" origin="GENERATED_TUTORIAL" width="90" x="447" y="34">
                <parameter key="criterion" value="gain_ratio"/>
                <parameter key="maximal_depth" value="10"/>
                <parameter key="apply_pruning" value="true"/>
                <parameter key="confidence" value="0.1"/>
                <parameter key="apply_prepruning" value="true"/>
                <parameter key="minimal_gain" value="0.01"/>
                <parameter key="minimal_leaf_size" value="2"/>
                <parameter key="minimal_size_for_split" value="4"/>
                <parameter key="number_of_prepruning_alternatives" value="3"/>
              </operator>
              <operator activated="true" class="delay" compatibility="10.2.000" expanded="true" height="103" name="Delay" width="90" x="581" y="34">
                <parameter key="delay" value="fixed"/>
                <parameter key="delay_amount" value="0"/>
                <parameter key="min_delay_amount" value="0"/>
                <parameter key="max_delay_amount" value="1000"/>
                <description align="center" color="transparent" colored="false" width="126">explicitly make sure the execution order is correct and the macro already exists</description>
              </operator>
              <operator activated="true" class="annotate" compatibility="10.2.000" expanded="true" height="68" name="Annotate" width="90" x="715" y="34">
                <list key="annotations">
                  <parameter key="Source" value="%{currentLabel}"/>
                </list>
                <parameter key="duplicate_annotations" value="overwrite"/>
                <description align="center" color="transparent" colored="false" width="126">annotate the model with the current label attribute name as &amp;quot;Source&amp;quot;</description>
              </operator>
              <operator activated="true" class="utility:create_exampleset" compatibility="10.2.000" expanded="true" height="68" name="Create ExampleSet" width="90" x="849" y="187">
                <parameter key="generator_type" value="attribute functions"/>
                <parameter key="number_of_examples" value="1"/>
                <parameter key="use_stepsize" value="false"/>
                <list key="function_descriptions">
                  <parameter key="CurrentLabel" value="%{currentLabel}"/>
                </list>
                <parameter key="add_id_attribute" value="false"/>
                <list key="numeric_series_configuration"/>
                <list key="date_series_configuration"/>
                <list key="date_series_configuration (interval)"/>
                <parameter key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
                <parameter key="time_zone" value="SYSTEM"/>
                <parameter key="column_separator" value=","/>
                <parameter key="parse_all_as_nominal" value="false"/>
                <parameter key="decimal_point_character" value="."/>
                <parameter key="trim_attribute_names" value="true"/>
                <description align="center" color="transparent" colored="false" width="126">use the macro to generate a new example set just containing the name of the current label</description>
              </operator>
              <connect from_port="example set" to_op="Multiply" to_port="input"/>
              <connect from_op="Multiply" from_port="output 1" to_op="Decision Tree" to_port="training set"/>
              <connect from_op="Multiply" from_port="output 2" to_op="Extract Statistics" to_port="example set input"/>
              <connect from_op="Extract Statistics" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
              <connect from_op="Filter Examples" from_port="example set output" to_op="Extract Macro" to_port="example set"/>
              <connect from_op="Extract Macro" from_port="example set" to_op="Delay" to_port="through 2"/>
              <connect from_op="Decision Tree" from_port="model" to_op="Delay" to_port="through 1"/>
              <connect from_op="Delay" from_port="through 1" to_op="Annotate" to_port="input"/>
              <connect from_op="Annotate" from_port="output" to_port="out 1"/>
              <connect from_op="Create ExampleSet" from_port="output" to_port="out 2"/>
              <portSpacing port="source_example set" spacing="0"/>
              <portSpacing port="sink_out 1" spacing="0"/>
              <portSpacing port="sink_out 2" spacing="0"/>
              <portSpacing port="sink_out 3" spacing="0"/>
              <description align="center" color="yellow" colored="false" height="213" resized="true" width="423" x="142" y="155">extract label information into a macro</description>
            </process>
          </operator>
          <connect from_op="Golf" from_port="output" to_op="Set Role (2)" to_port="example set input"/>
          <connect from_op="Set Role (2)" from_port="example set output" to_op="Loop Labels" to_port="example set"/>
          <connect from_op="Loop Labels" from_port="out 1" to_port="result 1"/>
          <connect from_op="Loop Labels" from_port="out 2" to_port="result 2"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
          <portSpacing port="sink_result 3" spacing="0"/>
        </process>
      </operator>
    </process>
    

    (You can find the Annotations in the Result view)

  • mansour
    mansour New Altair Community Member
    CKönig
    Many thanks. I want the output to be saved as an Excel file for further analysis
    Best wishes
    Mansour
  • CKönig
    CKönig New Altair Community Member
    Yes, I understand. Just take the yellow part of my example and put it into the beginning of your "Loop Labels" subprocess. Then you can use %{currentLabel}.xlsx as the file name,

  • mansour
    mansour New Altair Community Member
    CKönig 
    I tried it but the label doesn't transfer to the next part and I receive an error of not label defined.
    Regards.
    mansoru