Log Loss/Cross Entropy
I'm trying to understand how Rapidminer computes Cross Entropy. I set up a basic process and asked RM to compute Cross Entropy. I'm attaching the process. But when I compute cross-entropy by hand (excel) or in other programs (R/Python), I get a different number from the one I'm getting in RM.
Cross Entropy = - { y Ln ( p ) + (1-y) Ln(1-p) }
RM Cross Entropy = 0.422
RM Excel/R/Python = 0.3135.
Google sheet:
https://docs.google.com/spreadsheets/d/1o1r3VgsrJxe4R27SV23WmUy7JVGDFIEMAK3riU-TxV0/edit?usp=sharing
Am I missing something basic?
Thanks in advance for any help,
\E
<?xml version="1.0" encoding="UTF-8"?><process version="9.0.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="9.0.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="9.0.001" expanded="true" height="68" name="Retrieve Golf" width="90" x="179" y="136">
<parameter key="repository_entry" value="//Samples/data/Golf"/>
</operator>
<operator activated="true" class="h2o:logistic_regression" compatibility="9.0.000" expanded="true" height="124" name="Logistic Regression" width="90" x="380" y="136"/>
<operator activated="true" class="apply_model" compatibility="9.0.001" expanded="true" height="82" name="Apply Model" width="90" x="581" y="136">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance_classification" compatibility="9.0.001" expanded="true" height="82" name="Performance" width="90" x="715" y="34">
<parameter key="cross-entropy" value="true"/>
<list key="class_weights"/>
</operator>
<connect from_op="Retrieve Golf" from_port="output" to_op="Logistic Regression" to_port="training set"/>
<connect from_op="Logistic Regression" from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_op="Logistic Regression" from_port="exampleSet" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_port="result 1"/>
<connect from_op="Performance" from_port="example set" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>