Hi,
I am pretty new to RapidMiner, so probably I just misunderstand the "Report" operator in combination with the "Performance Vector". I searched the forum but currently no success, even if I think it MUST have been discussed somewhere else.
My problem is the following: I am playing around using a DT to create some rules and then test it on some test data. However, trying to write the performance results to an Excel file does not work properly.
Sample:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.2.008">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.2.008" expanded="true" name="Process">
<parameter key="resultfile" value="C:\Users\staender.TK\Desktop\testResult"/>
<process expanded="true" height="487" width="1118">
<operator activated="true" class="retrieve" compatibility="5.2.008" expanded="true" height="60" name="Retrieve" width="90" x="179" y="30">
<parameter key="repository_entry" value="//Samples/data/Golf"/>
</operator>
<operator activated="true" class="x_validation" compatibility="5.2.008" expanded="true" height="112" name="Validation" width="90" x="313" y="30">
<parameter key="number_of_validations" value="2"/>
<process expanded="true" height="463" width="529">
<operator activated="true" class="decision_tree" compatibility="5.2.008" expanded="true" height="76" name="Decision Tree" width="90" x="224" y="30"/>
<connect from_port="training" to_op="Decision Tree" to_port="training set"/>
<connect from_op="Decision Tree" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true" height="463" width="529">
<operator activated="true" class="apply_model" compatibility="5.2.008" expanded="true" height="76" name="Apply Model" width="90" x="45" y="30">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance_classification" compatibility="5.2.008" expanded="true" height="76" name="Performance" width="90" x="186" y="29">
<parameter key="classification_error" value="true"/>
<list key="class_weights"/>
</operator>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="reporting:generate_report" compatibility="5.2.000" expanded="true" height="76" name="Generate Report" width="90" x="514" y="30">
<parameter key="report_name" value="TreeTest"/>
<parameter key="format" value="Excel"/>
<parameter key="html_output_directory" value="C:\temp"/>
<parameter key="pdf_output_file" value="C:\temp\test.pdf"/>
<parameter key="excel_output_file" value="c:\temp\extest.xls"/>
<parameter key="pdf_template_file" value="C:\Program Files\Rapid-I\RapidMiner5\no file selected"/>
<parameter key="image_template_file" value="C:\Program Files\Rapid-I\RapidMiner5\no file selected"/>
<parameter key="set_background_color" value="true"/>
<parameter key="section_one_font" value="courier"/>
<parameter key="section_two_font" value="courier"/>
<parameter key="section_three_font" value="courier"/>
<parameter key="section_four_font" value="courier"/>
<parameter key="section_five_font" value="courier"/>
<parameter key="text_content_font" value="courier"/>
</operator>
<operator activated="true" class="reporting:report" compatibility="5.2.000" expanded="true" height="60" name="Report" width="90" x="855" y="30">
<parameter key="report_name" value="TreeTest"/>
<parameter key="report_item_header" value="Report - Performance Vector"/>
<parameter key="specified" value="true"/>
<parameter key="reportable_type" value="Performance Vector"/>
<parameter key="renderer_name" value="Table / Plot View"/>
<list key="parameters"/>
</operator>
<connect from_op="Retrieve" from_port="output" to_op="Validation" to_port="training"/>
<connect from_op="Validation" from_port="averagable 1" to_op="Generate Report" to_port="through 1"/>
<connect from_op="Generate Report" from_port="through 1" to_op="Report" to_port="reportable in"/>
<connect from_op="Report" from_port="reportable out" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
This sample produces an Excel file with two cells:
Report - Performance Vector yes: 4 7 |
Thats all. Exporting to PDF shows accuracy, confusion matrix etc:
Report - Performance Vector accuracy: 57.14% +/- 14.29% (mikro: 57.14%) ConfusionMatrix: True: no yes no: 1 2 yes: 4 7 |
Is this a bug or just a misunderstanding by me?
Thank's in advance,
Marcus